core/sql/ustat/hs_globals.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         hs_globals.C
  * Description:  For managing globals.
  * Created:      03/25/96
  * Language:     C++
  *
  *
  *
  *
  *****************************************************************************
  */

 #define HS_FILE "hs_globals"

 #define   SQLPARSERGLOBALS_FLAGS
 #include "SqlParserGlobalsCmn.h"

 #include <math.h>
 #include <stdlib.h>
 #include <time.h>
 #include <limits.h>
 #include <memory>
 #include "ComDiags.h"
 #include "hs_globals.h"
 #include "hs_cli.h"
 #include "hs_la.h"
 #include "hs_auto.h"
 #include "hs_parser.h"
 #include "hs_faststats.h"
 #include "ComCextdecs.h"
 #include "NAString.h"
 #include "wstr.h"
 #include "Collections.h"
 #include "NumericType.h"
 #include "exp_datetime.h"
 #include "DatetimeType.h"
 #include "SchemaDB.h"
 #include "CompException.h"
 #include "SQLTypeDefs.h"
 #include "csconvert.h"
 #include "exp_clause_derived.h"  // convDoIt
 #include "ExSqlComp.h" // for NAExecTrans()
 #include "sql_id.h"
 #include "parser.h"
 #include "ComUser.h"
 #include "CmpSeabaseDDL.h"
 #include "PrivMgrDefs.h"
 #include "PrivMgrComponentPrivileges.h"
 #include "PrivMgrCommands.h"
 #include "CmpDDLCatErrorCodes.h"
 #include "HBaseClient_JNI.h"  // to get HBC_ERROR_ROWCOUNT_EST_EXCEPTION

 #include <sys/stat.h>
 #include <sys/types.h>
 //#include <fcntl.h>

 #include <sys/types.h>
 #include <sys/dir.h>
 #include <sys/param.h>
 #include <unistd.h>
 #include <sys/statvfs.h>
 #include "NAClusterInfo.h"
 #include <errno.h>
 #include <fcntl.h>


 #include "Globals.h"
 #include "ExpHbaseInterface.h"


 #define srand48 srand
 #define lrand48 rand
 #define MathPow(op1, op2, err) pow(op1, op2)
 #define MathLog10(op, err) log10(op)

 template <class T>
 Int64 placeWidePivot(T* sortArr, Int64 lowInx, Int64 highInx, Int64 pivotInx,
                      Int64& pivotWidth);
 template <class T>
 void quicksort(T *sortArr, Int64 lowInx, Int64 highInx);

 bool isInternalSortType(HSColumnStruct &col);

 void formatFixedNumeric(Int64 value, Lng32 scale, char* buffer);

 void getPreviousUECRatios(HSColGroupStruct *groupList);

 THREAD_P float HSGlobalsClass::ISMemPercentage_ = 0;

 Lng32 doSort(HSColGroupStruct *group);

 THREAD_P NAString* HSGlobalsClass::defaultHiveCatName  = NULL;
 THREAD_P NAString* HSGlobalsClass::defaultHbaseCatName = NULL;

 Lng32 setBufferValue(MCWrapper& value, const HSColGroupStruct *mgroup, HSDataBuffer &boundary);

 template <class T>
 void createHistogram(HSColGroupStruct *group, Lng32 numIntervals, Int64 estRowCount, NABoolean usingSample, T* dummyPtr);

 static Lng32 create_I(NAString& sampTblName);

 static Lng32 drop_I(NAString& sampTblName);

 //
 // Initialize the GLOBAL instances of ISFixedChar and ISVarChar values.
 // See the "as lightweight as possible" comments in hs_globals.h
 //
 THREAD_P Int32 ISFixedChar::length = 0;
 THREAD_P NABoolean ISFixedChar::caseInsensitive = FALSE;
 THREAD_P CharInfo::Collation ISFixedChar::colCollation = CharInfo::DefaultCollation;
 THREAD_P CharInfo::CharSet ISFixedChar::charset = CharInfo::UnknownCharSet;

 THREAD_P Int32 ISVarChar::declaredLength = 0;
 THREAD_P NABoolean ISVarChar::caseInsensitive = FALSE;
 THREAD_P CharInfo::Collation ISVarChar::colCollation  = CharInfo::DefaultCollation;
 THREAD_P CharInfo::CharSet ISVarChar::charset = CharInfo::UnknownCharSet;

 // Initialize the static member hash table that stores jit-log threshold values.
 // It will be allocated in the HSGlobalsClass ctor for the first execution of
 // an Update Stats statement.
 THREAD_P JitLogHashType* HSGlobalsClass::jitLogThresholdHash = NULL;

 // Global variables for maintaining buffers used by Collated_cmp()
 THREAD_P Int32       lengthOfSortBufrs = 0;
 THREAD_P char *    sortBuffer1 = NULL;
 THREAD_P char *    sortBuffer2 = NULL;

 // Initialize the GLOBAL instances for MCWrapper
 THREAD_P MCIterator** MCWrapper::cols_ = NULL;
 THREAD_P MCIterator** MCWrapper::allCols_ = NULL;
 THREAD_P Int32 MCWrapper::numOfCols_ = 0;
 THREAD_P Int32 MCWrapper::numOfAllCols_ = 0;
 THREAD_P Int32 MCWrapper::nullCount_ = 0;

 // Names (used for logging) corresponding to SortState enum values.
 // Must match the enum.
 const char* SortStateName[] =
   {
     "UNPROCESSED",
     "PENDING",
     "PROCESSED",
     "DONT_TRY",
     "SKIP",
     "NO_STATS"
   };

 extern THREAD_P NAString* ius_where_condition_text;

 // This is from exp\exp_conv.cpp. We copied it here rather than try to figure
 // out the linkage issues necessary to be able to use the existing one.
 static short convFloat64ToAscii(char *target,
                                 Lng32 targetLen,
                                 double source,
                                 // maximum # of fraction digits
                                 Lng32 digits,
                                 char * varCharLen,
                                 Lng32 varCharLenSize,
                                 NABoolean leftPad);

 // If an NAHeap is given a request for more than 128MB minus a few bookkeeping
 // bytes, it will trigger an assert failure, even if failureIsFatal is false.
 // NOTE: This and all references to it can be removed once the fix to check this
 //       in NAHeap has been released.
 #define MAX_NAHEAP_SINGLE_ALLOC ((128 * 1024 * 1024) - 1024)

 // The arrSz argument passed to this macro should not have side effects; it is
 // evaluated twice in the macro expansion.
 #define newObjArr(clsName, arrSz)                      \
     (sizeof(clsName)*(arrSz) > MAX_NAHEAP_SINGLE_ALLOC \
         ? 0                                            \
         : new clsName[(size_t)arrSz])
 #define delObjArr(ptr, clsName) {delete [] (clsName*)(ptr);}
 #ifdef _TEST_ALLOC_FAILURE
 #define newObjArrX(clsName, arrSz, count)               \
     ((sizeof(clsName)*(arrSz) > MAX_NAHEAP_SINGLE_ALLOC \
               || HSColGroupStruct::allocFilter(count))    \
         ? 0                                             \
         : new clsName[(size_t)arrSz])
 #endif

 // The elemCount argument passed to this macro should not be an expression with
 // side effects; it will be evaluated twice in the macro expansion. This macro
 // should only be invoked from the HSGlobalsClass scope, because it uses a
 // member variable (allocCount) when _TEST_ALLOC_FAILURE is defined. The
 // _TEST_ALLOC_FAILURE version simulates memory allocation failure for testing
 // purposes if allocCount equals one of a set of values specified via a CQD.
 #ifdef _TEST_ALLOC_FAILURE
 #define newBuiltinArr(elemType, elemCount)                         \
     ( HSColGroupStruct::allocFilter(allocCount++) ||                 \
             sizeof(elemType)*(elemCount) > MAX_NAHEAP_SINGLE_ALLOC \
         ? 0                                                        \
         : new (STMTHEAP) elemType[elemCount])
 #else
 #define newBuiltinArr(elemType, elemCount)                  \
     (sizeof(elemType)*(elemCount) > MAX_NAHEAP_SINGLE_ALLOC \
         ? 0                                                 \
         : STMTHEAP->allocateMemory(sizeof(elemType)*(elemCount), FALSE))
      //        : new (STMTHEAP, FALSE) elemType[elemCount])
 #endif

 void ISFixedChar::fail(const char* opName, Lng32 line)
 {
   HSLogMan *LM = HSLogMan::Instance();
   LM->Log("INTERNAL ERROR (ISFixedChar):");
   sprintf(LM->msg, "Undefined operator type %s", opName);
   LM->Log(LM->msg);
   *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                       << DgString0("ISFixedChar")
                       << DgString1("N/A")
                       << DgString2(LM->msg);
   throw CmpInternalException("failure in ISFixedChar",
                              __FILE__, line);
 }

 // Compare this object to rhs, returning negative value if less, 0 if equal,
 // and positive value if greater.
 Int32 ISFixedChar::compare(const ISFixedChar &rhs)
 {
   // Note that case insensitive is not supported with non-binary collation.
   if (CollationInfo::isSystemCollation(colCollation))
       return Collated_cmp(content, rhs.content, length, colCollation,
                           sortBuffer1, sortBuffer2);
   // UCS2 cols not supported in MODE_SPECIAL_1 and do not support case insensitivity.
   if (!caseInsensitive)
     {
       if (charset != CharInfo::UNICODE)
         return memcmp(content, rhs.content, length);
       else
         return na_wcsnncmp((const wchar_t *)content, length / sizeof(NAWchar),
                             (const wchar_t *)rhs.content, length / sizeof(NAWchar));
     }
   else
     return hs_strncasecmp(content, rhs.content, length);
 }

 /*************************************************/
 /* METHOD:   setupMCColumnIterator               */
 /* PURPOSE:  sets up the iterator for a given    */
 /*           MC column for the MCrapper class    */
 /* PARAMS:   group: group represeting an MC col  */
 /*           iter: all MC columns                */
 /*           iter2: MC not all-null cols         */
 /*           currentLoc: index in all cols       */
 /*           notNullLoc: index in not null cols  */
 /*           numRows: num of rows to process     */
 /*                                               */
 /* RETCODE:  0 for success and -1 for failure    */
 /*************************************************/

 Lng32 MCWrapper::setupMCColumnIterator (HSColGroupStruct *group, MCIterator** iter, MCIterator** iter2,
                                         Int32 &currentLoc, Int32 &notNullLoc, Int32 numRows)
 {
   Lng32 retcode = 0;
   char errtxt[100]={0};
   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, errtxt, TRUE);

   HSLogMan *LM = HSLogMan::Instance();

   // declared early to make the compiler happy
   MCFixedCharIterator* MCFcharIter;
   MCVarCharIterator* MCVcharIter;

   // not nullable column if its type is not nullable or if nullable but has
   // no null values
   NABoolean noNulls = (!group->colSet[0].nullflag || (group->nullCount == 0));

   switch (group->ISdatatype)
     {
       case REC_BIN8_SIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<char>((char *)group->mcis_data);
         break;

       case REC_BOOLEAN:
       case REC_BIN8_UNSIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<unsigned char>((unsigned char *)group->mcis_data);
         break;

       case REC_BIN16_SIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<short>((short *)group->mcis_data);
         break;

       case REC_BIN16_UNSIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<unsigned short>((unsigned short *)group->mcis_data);
         break;

       case REC_BIN32_SIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<Int32>((Int32 *)group->mcis_data);
         break;

       case REC_BIN32_UNSIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<UInt32>((UInt32 *)group->mcis_data);
         break;

       case REC_BIN64_SIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<Int64>((Int64 *)group->mcis_data);
         break;

       case REC_BIN64_UNSIGNED:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<UInt64>((UInt64 *)group->mcis_data);
         break;

       case REC_IEEE_FLOAT32:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<float>((float *)group->mcis_data);
         break;

       case REC_IEEE_FLOAT64:
         iter[currentLoc] = new (STMTHEAP) MCNonCharIterator<double>((double *)group->mcis_data);
         break;

       case REC_BYTE_F_ASCII:
       case REC_BYTE_F_DOUBLE:
         iter[currentLoc] = new (STMTHEAP) MCFixedCharIterator((char*)group->strData, group->ISlength);

         MCFcharIter = (MCFixedCharIterator*)(iter[currentLoc]);

         MCFcharIter->caseInsensitive = (group->colSet[0].caseInsensitive == 1);
         MCFcharIter->colCollation = group->colSet[0].colCollation;
         MCFcharIter->charset = group->colSet[0].charset;

         break;

       case REC_BYTE_V_ASCII:
       case REC_BYTE_V_DOUBLE:
         iter[currentLoc] = new (STMTHEAP) MCVarCharIterator((char*)group->strData);

         MCVcharIter = (MCVarCharIterator*)iter[currentLoc];

         // set row length
         MCVcharIter->rowLength = group->ISlength + VARCHAR_LEN_FIELD_IN_BYTES + (group->ISlength % 2);

         MCVcharIter->caseInsensitive = (group->colSet[0].caseInsensitive == 1);
         MCVcharIter->colCollation = group->colSet[0].colCollation;
         MCVcharIter->charset = group->colSet[0].charset;

         break;

       default:
         sprintf(errtxt, "MCsetIterator: unknown type %d", group->ISdatatype);
         sprintf(LM->msg, "MC INTERNAL ERROR: %s", errtxt);
         LM->Log(LM->msg);
         retcode = -1;
         HSHandleError(retcode);
         break;
     }

     // set the bit map null indicator
     if (iter[currentLoc] && group->mcis_nullIndBitMap)
       iter[currentLoc]->nullInd = group->mcis_nullIndBitMap;

     iter[currentLoc]->ISdatatype = group->ISdatatype;

     if (group->nullCount == numRows)
     {
        if (LM->LogNeeded())
        {
           sprintf(LM->msg, "\tMC: in setupMCColumnIterator, skiping column (%s), all values are null",
                   group->colSet[0].colname->data());
           LM->Log(LM->msg);
        }
     }
     else
     {
       iter2[notNullLoc] = iter[currentLoc];
       iter2[notNullLoc++]->nullInd = iter[currentLoc]->nullInd;

       if ((LM->LogNeeded()) && group->colSet[0].nullflag && (group->nullCount == 0))
       {
          sprintf(LM->msg, "\tMC: column (%s) is nullable but has no nulls", group->colSet[0].colname->data());
          LM->Log(LM->msg);
       }
     }

     return retcode;
 }

 /***********************************************/
 /* METHOD:   setupMCIterators                  */
 /* PURPOSE:  setup all MCWrapper iterators for */
 /*           all columns used by the MC        */
 /* PARAMS:   mgroup - the MC group to process  */
 /*           numRows - number of rows to       */
 /*                     process                 */
 /* RETCODE:  none                              */
 /***********************************************/

 void MCWrapper::setupMCIterators(HSColGroupStruct *mgroup, Int32 numRows)
 {
     HSColumnStruct   *col;
     HSColGroupStruct *sgroup;
     Int32 nonNullCols = 0;

     HSGlobalsClass *hs_globals = GetHSContext();

     Int32 colCount = mgroup->colCount;

     allCols_ = new (STMTHEAP) MCIterator*[colCount];
     cols_ = new (STMTHEAP) MCIterator*[colCount];

     for (Int32 j=0; j<colCount; j++)
     {
        col = &mgroup->colSet[j];
        sgroup = hs_globals->findGroup(col->colnum);
        setupMCColumnIterator (sgroup, allCols_, cols_, j, nonNullCols, numRows);
     }

     MCWrapper::numOfCols_ = nonNullCols;
     MCWrapper::numOfAllCols_ = colCount;
 }


 /***********************************************/
 /* METHOD:   checkAllColsHaveSameNumOfRows     */
 /* PURPOSE:  make sure all columns of the MC   */
 /*           have read in the same number      */
 /*           of columns                        */
 /* PARAMS:   mgroup - the MC group to process  */
 /*           numRows - number of rows to       */
 /*                     process                 */
 /* RETCODE:  TRUE/FALSE                        */
 /***********************************************/

 NABoolean checkAllColsHaveSameNumOfRows(HSColGroupStruct *mgroup, Int32 &numRows)
 {
    numRows = -1;

    Int32 prevNumRows = -1;
    NABoolean goodRowCount = TRUE;

    HSGlobalsClass *hs_globals = GetHSContext();

    HSColumnStruct   *col;
    HSColGroupStruct *sgroup;

    Lng32 colCount = mgroup->colCount;

    for (Lng32 x=0; x < colCount; x++)
    {
       col = &mgroup->colSet[x];
       sgroup = hs_globals->findGroup(col->colnum);
       numRows = sgroup->mcis_rowsRead;

       if (x==0)
       {
         prevNumRows = numRows;
       }
       else if (numRows != prevNumRows)
       {
          goodRowCount = FALSE;
          break;
       }
    }

    return goodRowCount;
 }

 /***********************************************/
 /* METHOD:   getCommonCols                     */
 /* PURPOSE:  Find the common bits/columns      */
 /*           between two bitmaps               */
 /* PARAMS:   map1 - bitmap1                    */
 /*           map2 - bitmap2                    */
 /* RETCODE:  number of common bits/columns     */
 /***********************************************/

 Int32 getCommonCols (const NABitVector &map1, const NABitVector &map2)
 {
   NABitVector common_bits(map1);

   common_bits.intersectSet(map2);

   return (Int32) common_bits.entries();
 }

 /***********************************************/
 /* METHOD:   getMissingCols                    */
 /* PURPOSE:  Find the missing bits/columns     */
 /*           between two bitmaps               */
 /* PARAMS:   map1 - bitmap1                    */
 /*           map2 - bitmap2                    */
 /*           missing_bits - bitmap3 that       */
 /*             keeps track of missing bits     */
 /* RETCODE:  number of bits/columns in map2    */
 /*           that are not in map1 and not      */
 /*           already in missing_bits           */
 /***********************************************/

 Int32 getMissingCols (const NABitVector &map1, const NABitVector &map2, NABitVector *missing_bits)
 {
   NABitVector new_missing_bits(map2);

   // bits in map2 that are not in map1
   new_missing_bits.subtractSet(map1);

   CollIndex initial = missing_bits->entries();

   missing_bits->addSet(new_missing_bits);

   return (Int32) (missing_bits->entries() - initial);
 }

 // If an HBase table is very large, we risk time-outs because the
 // sample scan doesn't return rows fast enough. In this case, we
 // want to reduce the HBase row cache size to a smaller number to
 // force more frequent returns. Experience shows that a value of
 // '10' worked well with a 17.7 billion row table with 128 regions
 // on six nodes (one million row sample). We'll assume a workable
 // HBase cache size value scales linearly with the sampling ratio.
 // That is, we'll assume the model:
 //
 //   workable value = (sample row count / actual row count) * c,
 //   where c is chosen so that we get 10 when the sample row count
 //   is 1,000,000 and the actual row count is 17.7 billion.
 //
 //   Solving for c, we get c = 10 * (17.7 billion/1 million).
 //
 // Note that the Generator does a similar calculation in
 // Generator::setHBaseNumCacheRows. The calculation here is more
 // conservative because we care more about getting UPDATE STATISTICS
 // done without a timeout, trading off possible speed improvements
 // by using a smaller cache size.
 //
 // Another issue is that it's been observed that time-outs also
 // depend on system load. Through experimentation we've discovered
 // that a value of 50 works well in loaded scenarios, assuming the
 // table is not too large. So, we use a maximum of the workable
 // value computed above and 50.
 //
 // Another note: If the user has already set HBASE_NUM_CACHE_ROWS_MAX,
 // then we don't do anything here. We respect the user's choices
 // instead.
 //
 // We had hoped that HBase 1.1, with its heartbeat protocol, would
 // solve this time-out problem for good. But early testing seems
 // to suggest that this is not the case.
 //
 // Input:
 // sampleRatio -- Percentage of rows being sampled.
 //
 // Return:
 // TRUE if the CQDs were altered, FALSE otherwise. The caller should use this
 // information to reset the CQDs following execution of the sample query to
 // avoid a performance penalty for subsequent queries (notably those that
 // read the sample table).
 NABoolean HSGlobalsClass::setHBaseCacheSize(double sampleRatio)
 {
   double calibrationFactor = 10 * (17700000000/1000000);
   Int64 workableCacheSize = (Int64)(sampleRatio * calibrationFactor);
   if (workableCacheSize < 1)
     workableCacheSize = 1;  // can't go below 1 unfortunately
   else if (workableCacheSize > 50)
     workableCacheSize = 50;

   // Do this only if the user didn't set the CQD in this session
   // (So, for example, if the CQD was set in the DEFAULTS table
   // but not in this session, we'll still override it.)
   NADefaults &defs = ActiveSchemaDB()->getDefaults();
   if (defs.getProvenance(HBASE_NUM_CACHE_ROWS_MAX) <
       NADefaults::SET_BY_CQD)
     {
       char temp1[40];  // way more space than needed, but it's safe
       Lng32 wcs = (Lng32)workableCacheSize;
       sprintf(temp1,"'%d'",wcs);
       NAString minCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN ";
       minCQD += temp1;
       HSFuncExecQuery(minCQD);
       NAString maxCQD = "CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX ";
       maxCQD += temp1;
       HSFuncExecQuery(maxCQD);
       hbaseCacheSizeCQDsSet_ = TRUE;
       return TRUE;
     }
   else
     return FALSE;
 }


 // If a Hive table has very long character columns, we might get
 // a SQL error 8446 when scanning it. The way around that is to
 // set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES to the longest string
 // length in the table. This is typically done by the user in
 // the sqlci or trafci session. However we need to propagate
 // it to our child tdm_arkcmp. This method does that.
 NABoolean HSGlobalsClass::setHiveMaxStringLengthInBytes(void)
 {
   NABoolean rc = FALSE;
   if (isHiveTable)
     {
       NADefaults &defs = ActiveSchemaDB()->getDefaults();
       if (defs.getProvenance(HIVE_MAX_STRING_LENGTH_IN_BYTES) >=
           NADefaults::SET_BY_CQD)
         {
           char temp1[40];  // way more space than needed, but it's safe
           UInt32 hiveMaxStringLengthInBytes =
             ActiveSchemaDB()->getDefaults().getAsULong(HIVE_MAX_STRING_LENGTH_IN_BYTES);

           sprintf(temp1,"'%u'",hiveMaxStringLengthInBytes);
           NAString theCQD = "CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES ";
           theCQD += temp1;
           HSFuncExecQuery(theCQD);

           hiveMaxStringLengthCQDSet_ = TRUE;
           rc = TRUE;
         }
     }

   return rc;
 }


 // If we set any CQDs in CollectStatistics that need to be
 // reset when we are done, do that here.
 void HSGlobalsClass::resetCQDs(void)
 {
   if (hbaseCacheSizeCQDsSet_)
     {
       HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MIN RESET");
       HSFuncExecQuery("CONTROL QUERY DEFAULT HBASE_NUM_CACHE_ROWS_MAX RESET");
       hbaseCacheSizeCQDsSet_ = FALSE;
     }
   if (hiveMaxStringLengthCQDSet_)
     {
       HSFuncExecQuery("CONTROL QUERY DEFAULT HIVE_MAX_STRING_LENGTH_IN_BYTES RESET");
       hiveMaxStringLengthCQDSet_ = FALSE;
     }
 }


 // rearrange the MCs so that the larger groups are listed first
 // and the ones that will not be processed (not enough memory) are
 // listed last, so to simplify the rest of the ordering algorithm
 void HSGlobalsClass::reArrangeMCGroups()
 {
    Int32 multiGroupCount = groupCount - singleGroupCount;
    HSColGroupStruct* s_m_group[multiGroupCount];

    HSColGroupStruct* mgroup = multiGroup;

    for (int i=0; i <multiGroupCount; i++)
    {
       if (i==0)
          s_m_group[i] = mgroup;
       else
       {
          int j=0;
          while((j<i) &&
                ((mgroup->colCount < s_m_group[j]->colCount) || (mgroup->state != UNPROCESSED)))
            j++;

          int k=i;
          while (k>j)
          {
             s_m_group[k] = s_m_group[k-1];
             k--;
          }
          s_m_group[k] = mgroup;
       }
       mgroup = mgroup->next;
    }

    s_m_group[0]->prev = NULL;
    s_m_group[multiGroupCount-1]->next = NULL;
    for (int i=0; i <multiGroupCount-1; i++)
    {
       s_m_group[i]->next = s_m_group[i+1];
       s_m_group[i+1]->prev = s_m_group[i];
    }

    multiGroup = s_m_group[0];
 }

 /***********************************************/
 /* METHOD:  orderMCGroupsNeeded                */
 /* PURPOSE: check if we need to re-order       */
 /*          multi-column and single-column     */
 /*          groups to maximize the number      */
 /*          of multi-column stats that can     */
 /*          be done in memory                  */
 /* PARAMS:  none                               */
 /* RETCODE:  1 - Memory is not enough to       */
 /*               process all SC and MC         */
 /*               together and at least one MC  */
 /*               column can be processed in    */
 /*               memory                        */
 /*          -0 - otherwise                     */
 /* ASSUMPTIONS: memory requirements for single */
 /*              and multi-column groups have   */
 /*              already been determined        */
 /***********************************************/

 NABoolean HSGlobalsClass::orderMCGroupsNeeded()
 {
   // ordering is not necessary if:
   //   multiGroup is empty
   //   all SC and MC fit in memory together
   //   all MC don't fit in memory
   // return FALSE is any of the above is TRUE

   HSLogMan *LM = HSLogMan::Instance();
   Int64 memNeededForAllCols = 0;
   Int64 memAllowed = getMaxMemory();
   HSColGroupStruct* sgroup = singleGroup;
   HSColGroupStruct* mgroup = multiGroup;

   // no MC groups to process
   if (mgroup == NULL)
   {
     if (LM->LogNeeded())
     {
        sprintf(LM->msg, "MC: NO rearranging of sgroup for IS: mgroup is empty");
        LM->Log(LM->msg);
     }

     return FALSE;
   }

   // memory needed by all single-column groups
   while (sgroup)
   {
      memNeededForAllCols += sgroup->memNeeded;
      sgroup = sgroup->next;
   }

   // memory needed by all multi-column groups
   while (mgroup)
   {
      memNeededForAllCols += mgroup->memNeeded;
      mgroup = mgroup->next;
   }

   // all columns and MCs can fit in memory together, no need to rearrange them
   if (memNeededForAllCols < memAllowed)
   {
     if (LM->LogNeeded())
     {
        sprintf(LM->msg, "MC: NO rearranging of sgroup for IS: there is enough memory to compute all MC groups");
        LM->Log(LM->msg);
     }

      return FALSE;
   }

   // we have multi-column groups and memory is not enough to process all single and
   // multi-column groups together, we need to check if any of the multi-column
   // groups can be processed in memory

   sgroup = singleGroup;
   mgroup = multiGroup;

   HSColumnStruct* col;
   NABoolean atLeastOne = FALSE;
   while (mgroup)
   {
      // don't try this multi-column group if its memory
      // requirement cannot be met
      if ((mgroup->state == UNPROCESSED) &&
          (mgroup->mcis_totalMCmemNeeded >= memAllowed))
      {
         mgroup->state = DONT_TRY;
         mgroup->memNeeded = 0;
         mgroup->mcis_totalMCmemNeeded = 0;
         for (Int32 i=0; i< mgroup->colCount; i++)
         {
            col = &mgroup->colSet[i];
            sgroup = findGroup(col->colnum);
            sgroup->mcs_usingme--;
         }

         if (LM->LogNeeded())
         {
            sprintf(LM->msg, "MC: NO memory available to compute MC (%s) using IS", mgroup->colNames->data());
            LM->Log(LM->msg);
         }
      }
      else if (mgroup->state == UNPROCESSED)
        atLeastOne = TRUE;

      mgroup = mgroup->next;
   }

   // all MCs are too large to fit in memory
   if (!atLeastOne)
   {
     if (LM->LogNeeded())
     {
        sprintf(LM->msg, "MC: NO rearranging of sgroup for IS: "
                         "either all MC groups are already processed or too large for memory");
        LM->Log(LM->msg);
     }

     return FALSE;
   }

   // we at least have one multi-column group that can be processed in memory
   if (LM->LogNeeded())
   {
     sprintf(LM->msg, "\tMC: proceeding with rearranging sgroup for IS: memory available for at"
                      " least one MC but not enough to process all eligible MCs together");
     LM->Log(LM->msg);
   }

   // rearrange the MCs so that the larger groups are listed first
   // and the ones that will not be processed (not enough memory) are
   // listed last, so to simplify the rest of the ordering algorithm
   reArrangeMCGroups();

   return TRUE;
 }


 // helper function to orderMCGroups
 // set the number of columns that are only used by this MC
 // this method also initializes mcis_groupHead
 void HSGlobalsClass::computeSingleUsedCols()
 {
    HSLogMan *LM = HSLogMan::Instance();
    HSColGroupStruct* mgroup = multiGroup;
    while (mgroup != NULL)
    {
        if (mgroup->state != UNPROCESSED)
        {
           mgroup = mgroup->next;
           continue;
        }

        // initialize all candidate groups as group heads
        mgroup->mcis_groupHead = TRUE;
        mgroup->mcis_groupWeight.w = 0;

        HSColGroupStruct *sgroup;
        HSColumnStruct* col;
        for (Int32 i=0; i<mgroup->colCount; i++)
        {
           col = &mgroup->colSet[i];
           sgroup = findGroup(col->colnum);
           if (sgroup->mcs_usingme == 1)
             mgroup->mcis_groupWeight.w++;
        }

        mgroup = mgroup->next;
    }
 }

 // helper function to orderMCGroups
 // this method computes the wieght (u,v) of an
 // MC. See below for definition of the weight
 void HSGlobalsClass::computeMCGroupsWeight()
 {
    HSColGroupStruct* m_group = multiGroup;
    HSColGroupStruct* remaining_mgroup = NULL;

    // compute the weight of every multi-column group
    // except for the ones that cannot fit into memory
    // or the ones that are already processed
    // weight of multi-group MCi is a vector (u,v) where:
    //     u: sum of multi-columns MCj that have common columns
    //        with MCi
    //     v: sum of |MCi| - |interset(MCi,MCj)| for all MCj
    //        that have common columns with MCi. A column
    //        is not counted twice
    //     (u1,v1) > (u2,v2) if:
    //     u1 > u2 or
    //     u1==u2 and v1 < v2

    while (m_group && m_group->next)
    {
       Int32 i = 0;
       Int32 commonCols = 0;
       if ((m_group->state == DONT_TRY) || (m_group->state == PROCESSED))
       {
          m_group->mcis_groupWeight.clear();
          m_group = m_group->next;
          continue;
       }

       remaining_mgroup = m_group->next;
       while (remaining_mgroup)
       {
          if ((remaining_mgroup->state != DONT_TRY) &&
              (remaining_mgroup->state != PROCESSED) &&
              (commonCols = getCommonCols (*(m_group->mcis_colsUsedMap), *(remaining_mgroup->mcis_colsUsedMap))) > 0 )
          {
             m_group->mcis_groupWeight.u++;

             remaining_mgroup->mcis_groupWeight.u++;

             m_group->mcis_groupWeight.v +=
                      getMissingCols (*(m_group->mcis_colsUsedMap),
                                      *(remaining_mgroup->mcis_colsUsedMap),
                                      m_group->mcis_colsMissingMap);
             remaining_mgroup->mcis_groupWeight.v +=
                      getMissingCols (*(remaining_mgroup->mcis_colsUsedMap),
                                      *(m_group->mcis_colsUsedMap),
                                      remaining_mgroup->mcis_colsMissingMap);

             // do this here to avoid another traversal
             // A group HEAD is a group that has common columns
             // (or no common columns at all) with other groups
             // and has the highest weight
             if (remaining_mgroup->mcis_groupWeight <= m_group->mcis_groupWeight)
             {
                 remaining_mgroup->mcis_groupHead = FALSE;
             }

          }
          remaining_mgroup = remaining_mgroup->next;
       }

       m_group = m_group->next;
    }
 }

 // helper function to orderMCGroups
 // order the MC groups in an assending order
 // by their weight
 void HSGlobalsClass::reorderMCGroupsByWeight()
 {
    Int32 multiGroupCount = groupCount - singleGroupCount;
    HSColGroupStruct* s_m_group[multiGroupCount];

    HSColGroupStruct* mgroup = multiGroup;

    for (int i=0; i <multiGroupCount; i++)
    {
       if (i==0)
          s_m_group[i] = mgroup;
       else
       {
          int j=0;
          while((j<i) && (mgroup->mcis_groupWeight < s_m_group[j]->mcis_groupWeight))
            j++;

          int k=i;
          while (k>j)
          {
             s_m_group[k] = s_m_group[k-1];
             k--;
          }
          s_m_group[k] = mgroup;
       }
       mgroup = mgroup->next;
    }

    s_m_group[0]->prev = NULL;
    s_m_group[multiGroupCount-1]->next = NULL;
    for (int i=0; i <multiGroupCount-1; i++)
    {
       s_m_group[i]->next = s_m_group[i+1];
       s_m_group[i+1]->prev = s_m_group[i];
    }

    multiGroup = s_m_group[0];
 }

 // helper function to orderMCGroups
 // form group sets and connect group heads to their neighbors
 // using the mcis_next pointers
 void HSGlobalsClass::formGroupSets()
 {
    HSLogMan *LM = HSLogMan::Instance();

    // get the list of neighbors to the group with the highest weight
    HSColGroupStruct* mgroup_set = multiGroup;
    HSColGroupStruct* mgroup =  NULL;

    while (mgroup_set)
    {
       if (mgroup_set->state == DONT_TRY)
       {
          if (LM->LogNeeded())
          {
             sprintf(LM->msg, "\tMC: GROUP (%s) has state DONT_TRY, is skipped", mgroup_set->colNames->data());
             LM->Log(LM->msg);
          }
       }
       else if ((mgroup_set->mcis_groupHead) && (mgroup_set->state != PROCESSED))
       {
          mgroup_set->mcis_next = mgroup_set;

          if (LM->LogNeeded())
          {
             sprintf(LM->msg, "\tMC: GROUP (%s) is a HEAD GROUP ", mgroup_set->colNames->data());
             LM->Log(LM->msg);
          }

          mgroup =  multiGroup;

          Int32 i = 0;
          while (mgroup != NULL)
          {
             if((!mgroup->mcis_groupHead) &&
                (mgroup->state == UNPROCESSED) &&
                getCommonCols (*(mgroup_set->mcis_colsUsedMap), *(mgroup->mcis_colsUsedMap)))
             {

                if (mgroup->mcis_next == NULL)
                {
                    HSColGroupStruct *myNextNeighbpr = mgroup_set;
                    if (myNextNeighbpr->mcis_next == NULL)
                    {
                        myNextNeighbpr->mcis_next = mgroup;
                        mgroup->mcis_next = mgroup_set;
                    }
                    else
                    {
                       while ((myNextNeighbpr->mcis_next != mgroup_set) &&
                              (mgroup->mcis_groupWeight.w < myNextNeighbpr->mcis_next->mcis_groupWeight.w))
                           myNextNeighbpr = myNextNeighbpr->mcis_next;

                        mgroup->mcis_next = myNextNeighbpr->mcis_next;
                        myNextNeighbpr->mcis_next = mgroup;
                    }
                }
             }

             mgroup = mgroup->next;
          }

          if (LM->LogNeeded())
          {
             HSColGroupStruct *myNextNeighbpr = mgroup_set;

             while(myNextNeighbpr->mcis_next && (myNextNeighbpr->mcis_next != mgroup_set))
             {
               myNextNeighbpr = myNextNeighbpr->mcis_next;

               sprintf(LM->msg, "\tMC: GROUP (%s) is neighbor of HEAD GROUP (%s)",
                       myNextNeighbpr->colNames->data(), mgroup_set->colNames->data());
               LM->Log(LM->msg);
             }

             if (mgroup_set->mcis_groupWeight.isNull())
             {
                sprintf(LM->msg, "\tMC: GROUP (%s) has no neighbors",
                        mgroup_set->colNames->data());
                LM->Log(LM->msg);
             }
          }
       }

       mgroup_set = mgroup_set->next;
    }
 }

 // helper function to orderMCGroups
 // reorder the single groups used by the multi-groups by listing the columns
 // used by the multi-groups with highest weight first
 // this is done so to not alter the IS logic that schedules the processing of
 // single column groups
 // this method proceeds as follows:
 //   1- identify the SC that are used by a head group and its neighbors
 //   2- order the SC so that the columns used by the head group are listed first
 void HSGlobalsClass::reorderSingleGroupsByWeight (HSColGroupStruct* s_group_back[],
                                                   Int32 colsOrder[], Int32 &headGroupCols)
 {
    HSLogMan *LM = HSLogMan::Instance();

    HSColGroupStruct* mgroup_set = multiGroup;
    HSColGroupStruct* sgroup = NULL;
    HSColGroupStruct* s_m_group[singleGroupCount];

    Int32 i = 0;
    NABitVector seenCols;
    while (mgroup_set != NULL)
    {
        if ((mgroup_set->state != PROCESSED) &&
            (mgroup_set->mcis_groupHead) &&
            (mgroup_set->mcis_next != NULL))
        {
           HSColGroupStruct* start = mgroup_set;

           if (start->mcis_groupWeight.w < start->mcis_next->mcis_groupWeight.w)
              start = start->mcis_next;

           HSColGroupStruct* end = start;

           while(start->mcis_next && (start->mcis_next != end))
           {
             NABitVector new_cols(*start->mcis_colsUsedMap);

             new_cols.subtractSet(seenCols);

             for (CollIndex j=0; new_cols.nextUsed(j); j++)
               colsOrder[i++] = j;

             seenCols.addSet(*start->mcis_colsUsedMap);
             start = start->mcis_next;
           }

        }
        mgroup_set = mgroup_set->next;
    }

    headGroupCols = i;
    // fill in the remaining single columns that are not used by any
    // multi-group
    for (CollIndex j=0; j<singleGroupCount; j++)
      if (!seenCols.testBit(j))
         colsOrder[i++] = j;

    if (LM->LogNeeded())
    {
       sprintf(LM->msg, "\tMC: order of single group columns BEFORE reorder");
       LM->Log(LM->msg);

       // get current single columns order
       sgroup = singleGroup;
       while (sgroup != NULL)
       {
         sprintf(LM->msg, "\tMC: SINGLE has mcs_usingme (%d) col: (%s)",
                 sgroup->mcs_usingme, sgroup->colSet[0].colname->data());
         LM->Log(LM->msg);
         sgroup = sgroup->next;
       }
    }

    // do the actual reordering and rearrange the single groups list
    i = 0;
    while (i < singleGroupCount)
    {
       s_m_group[i] = s_group_back[colsOrder[i]];
       if (i == 0)
         s_m_group[i]->prev = NULL;
       else
         s_m_group[i-1]->next = s_m_group[i];
       i++;
    }

    s_m_group[singleGroupCount-1]->next = NULL;

    singleGroup = s_m_group[0];

    if (LM->LogNeeded())
    {
       sprintf(LM->msg, "\tMC: order of single columns groups AFTER reorder");
       LM->Log(LM->msg);

       // get current single columns order
       sgroup = singleGroup;
       while (sgroup != NULL)
       {
         sprintf(LM->msg, "\tMC: SINGLE has mcs_usingme (%d) col: (%s)",
                 sgroup->mcs_usingme, sgroup->colSet[0].colname->data());
         LM->Log(LM->msg);
         sgroup = sgroup->next;
       }
    }
 }

 /***********************************************/
 /* METHOD:  freeMCISmemory                     */
 /* PURPOSE: helper function to orderMCGroups   */
 /*          free up (remove from memory) any   */
 /*          columns that are not being used    */
 /*          by the candidate multi-group       */
 /*          (the one with the highest weight)  */
 /* PARAMS:  s_group_back: array of pointers    */
 /*               to the initial list           */
 /*               (order) of SC                 */
 /*          colsOrder: array of indexes to     */
 /*               the s_group_back              */
 /*          headGroupCol: number of columns in */
 /*               head MC and its neighbors     */
 /* RETCODE: none                               */
 /* ASSUMPTIONS: none                           */
 /***********************************************/

 void HSGlobalsClass::freeMCISmemory(HSColGroupStruct* s_group_back[],
                                     Int32 colsOrder[], Int32 &headGroupCols)
 {
    HSLogMan *LM = HSLogMan::Instance();
    HSColGroupStruct* sgroup = singleGroup;
    while (sgroup != NULL)
    {
       // column still in memory but will not be used
       // by head group or its neighbors
       if ((sgroup->state == PROCESSED) && !sgroup->mcis_memFreed)
       {
          Int32 i = 0;
          while (i < headGroupCols)
          {
             if (s_group_back[colsOrder[i]] == sgroup)
               break;
             i++;
          }

          // group was not found in the list of used cols
          if (i >= headGroupCols)
          {
             sgroup->freeISMemory();
             sgroup->mcis_readAsIs = TRUE;
             sgroup->state = UNPROCESSED;

             if (LM->LogNeeded())
             {
                sprintf(LM->msg, "MC: col: (%s) memory is released using freeMCISmemory",
                                  sgroup->colSet[0].colname->data());
                LM->Log(LM->msg);
             }
          }
       }

       sgroup = sgroup->next;
    }
 }

 /***********************************************/
 /* METHOD:  orderMCGroups                      */
 /* PURPOSE: re-order multi-column and single   */
 /*          column groups to maximize the      */
 /*          number of multi-column stats that  */
 /*          can be done in memory              */
 /* PARAMS:  s_group_back: array of pointers    */
 /*          to the initial list (order) of SC  */
 /* RETCODE: none                               */
 /* ASSUMPTIONS: memory requirements for single */
 /*              and multi-column groups have   */
 /*              already been determined and    */
 /*              multi-columns that cannot be   */
 /*              processed in memory has        */
 /*              already been identified and    */
 /*              flagged                        */
 /***********************************************/

 void HSGlobalsClass::orderMCGroups(HSColGroupStruct* s_group_back[])
 {
    // check if reorder of multi/single groups is needed
    if (!orderMCGroupsNeeded())
       return;

      /*================================================================
         The algorithm to identify the order of processing for all MCs.
         Let MC1, MC2, MC3, . MCn denote n MCs to build.

         0- identify all MC group that cannot be processed in
            memory (flagged as PROCESSED or DONT_TRY)
         1- For every remaining eligible MCi
 	   a- Let MCj denote another MC where MCi and MCj have
               common single columns
 	   b- For each such MCj, compute the number of columns to
               read into memory after MCi is processed.
               This number is |MCj| - |overlap(MCi,MCj)|.
 	   c- Compute a weight vector (u,v),
               u= # of MCjs that can be processed after MCi,
               v= total # of columns in these MCj to read.
 	2- Denote MCk as the MCi with the largest u and smallest v
 	   a- Remove columns from memory that are not needed to build
               MCk and its neighbors.
 	   b-Reading in columns necessary to built MCk, build MCk
 	   c-For each MCj connected to MCk, reading in columns necessary
              to build MCj, build MCj
 	3- Go back to step 1
      =================================================================*/

    HSLogMan *LM = HSLogMan::Instance();

    LM->StartTimer("MC: orderMCGroups");

    // compute how many columns of each MC that are only used by that MC
    computeSingleUsedCols();

    // compute the weight (u,v) of each MC
    computeMCGroupsWeight();

    // reorder the MC groups to get the MC with the largest weight (u,v)
    reorderMCGroupsByWeight();

    // connect group heads to their neighbors
    formGroupSets();

    // arary of indexes to the s_group_back
    Int32 colsOrder[singleGroupCount];
    // number of columns in the head MC and its neighbors
    Int32 headGroupCols = 0;

    // reorder the single column groups so that all columns used by
    // the head MC (MC with the largest weight) are listed first, followed
    // by the columns used by the neighbors MCs
    reorderSingleGroupsByWeight(s_group_back, colsOrder, headGroupCols);

    // free up (remove from memory) any columns that are not being used
    // by the candidate multi-group (the one with the highest weight)
    // or its neighbors
    freeMCISmemory(s_group_back, colsOrder, headGroupCols);

    LM->StopTimer();
 }

 void MCWrapper::fail(const char* opName, Lng32 line)
 {
   HSLogMan *LM = HSLogMan::Instance();
   LM->Log("INTERNAL ERROR (MCWrapper):");
   sprintf(LM->msg, "Undefined operator type %s", opName);
   LM->Log(LM->msg);
   *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                       << DgString0("MCWrapper")
                       << DgString1("N/A")
                       << DgString2(LM->msg);
   throw CmpInternalException("failure in MCWrapper",
                              __FILE__, line);
 }


 void ISVarChar::fail(const char* opName, Lng32 line)
 {
   HSLogMan *LM = HSLogMan::Instance();
   LM->Log("INTERNAL ERROR (ISVarChar):");
   sprintf(LM->msg, "Undefined operator type %s", opName);
   LM->Log(LM->msg);
   *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                       << DgString0("ISVarChar")
                       << DgString1("N/A")
                       << DgString2(LM->msg);
   throw CmpInternalException("failure in ISVarChar",
                              __FILE__, line);
 }

 // Compare this object to rhs, returning negative value if less, 0 if equal,
 // and positive value if greater.
 Int32 ISVarChar::compare(const ISVarChar &rhs)
 {
   Int32 result;
   Int16 lhsLen = *(short*)content;
   Int16 rhsLen = *(short*)rhs.content;
   Int16 minLen = MINOF(lhsLen, rhsLen);
   Int16 diffLen;
   char* diffPtr;

   // Note that case insensitive is not supported with non-binary collation.
   if (CollationInfo::isSystemCollation(colCollation))
       return Collated_cmp(content+VARCHAR_LEN_FIELD_IN_BYTES,
                           rhs.content+VARCHAR_LEN_FIELD_IN_BYTES,
                           MAXOF(*((short*)content), *((short*)rhs.content)),
                           colCollation, sortBuffer1, sortBuffer2);

   // UCS2 cols not supported in MODE_SPECIAL_1 and do not support case insensitivity.
   if (!caseInsensitive) {
     if (charset != CharInfo::UNICODE)
     {
       result = memcmp(content+VARCHAR_LEN_FIELD_IN_BYTES,
                     rhs.content+VARCHAR_LEN_FIELD_IN_BYTES,
                     minLen);
       if (result != 0 || lhsLen == rhsLen)
         return result;
       else
       {
         if (minLen == lhsLen)
         {
           diffPtr = rhs.content + VARCHAR_LEN_FIELD_IN_BYTES + minLen;
           diffLen = rhsLen - minLen;
         }
         else
         {
           diffPtr = content + VARCHAR_LEN_FIELD_IN_BYTES + minLen;
           diffLen = lhsLen - minLen;
         }
         for (int i = 0; i < diffLen; i++)
         {
           if (*diffPtr++ != ' ')
             return (minLen == lhsLen ? -1 : 1);
         }
         return 0;
       }
     }
     else
       return compareWcharWithBlankPadding((const wchar_t*)(content+VARCHAR_LEN_FIELD_IN_BYTES),
                           *((short*)content) / sizeof(NAWchar),
                           (const wchar_t*)(rhs.content+VARCHAR_LEN_FIELD_IN_BYTES),
                           *((short*)rhs.content) / sizeof(NAWchar));
   }
   else
     return hs_strncasecmp(content+VARCHAR_LEN_FIELD_IN_BYTES,
                           rhs.content+VARCHAR_LEN_FIELD_IN_BYTES,
                           MAXOF(*((short*)content), *((short*)rhs.content)));
 }

 Int32 ISVarChar::operator==(const ISVarChar &rhs)
 {
   return !compare(rhs);  // returns 1 if equal, 0 if not
 }

 void IUSFixedChar::operator=(const HSDataBuffer& buff)
 {
   Int16 bytesPerChar = (charset == CharInfo::UNICODE ? sizeof(NAWchar) : 1);
   content = new (STMTHEAP) char[length * bytesPerChar];
   size_t buffLenBytes = (size_t)buff.length();
   const char* buffData = buff.data();

   // MFV for interval 0 is always empty without enclosing quotes, so we have to
   // check for that before removing quotes.
   if (buffLenBytes > 0)
     {
       buffLenBytes -= (2 * sizeof(NAWchar));
       buffData += sizeof(NAWchar);
     }

   if (charset == CharInfo::UNICODE)
     {
       memmove(content, buffData, buffLenBytes);
       for (NAWchar* p=(NAWchar*)(content+buffLenBytes);
             p<((NAWchar*)content)+length;
             p++)
         *p = L' ';
     }
   else
     {
       na_wcstombs(content, (const NAWchar*)buffData, buffLenBytes);
       for (char* p=content+(buffLenBytes/sizeof(NAWchar)); p<content+length; p++)
         *p = ' ';
     }
 }


 void IUSVarChar::operator=(const HSDataBuffer& buff)
 {
   size_t sizeFieldBytes = sizeof(Int16);
   size_t buffLenBytes = (size_t)buff.length();
   const char* buffData = buff.data();

   // MFV for interval 0 is always empty without enclosing quotes, so we have to
   // check for that before removing quotes.
   if (buffLenBytes > 0)
     {
       buffLenBytes -= (2 * sizeof(NAWchar));
       buffData += sizeof(NAWchar);
     }

   size_t destBytes = (charset == CharInfo::UNICODE
                        ? buffLenBytes
                        : buffLenBytes / sizeof(NAWchar));
   content = new (STMTHEAP) char[sizeFieldBytes + destBytes];
   *(Int16*)content = destBytes;
   if (charset == CharInfo::UNICODE)
     memmove(content+sizeFieldBytes, buffData, buffLenBytes);
   else
     na_wcstombs(content+sizeFieldBytes, (const NAWchar*)buffData, destBytes);
 }

 #ifdef _TEST_ALLOC_FAILURE
 Int32 HSColGroupStruct::allocCount = 1;
 #endif

 // -----------------------------------------------------------------------
 // Constructor and destructor.
 // -----------------------------------------------------------------------
 HSColGroupStruct::HSColGroupStruct()
       : colSet(STMTHEAP), colCount(0), clistr(new(STMTHEAP) NAString(STMTHEAP)),
         oldHistid(0), newHistid(0), colNames(new(STMTHEAP) NAString(STMTHEAP)),
         groupHist(NULL), next(NULL), prev(NULL), state(UNPROCESSED),
         memNeeded(0), strMemAllocated(0),
         data(NULL), nextData(NULL), strData(NULL), strNextData(NULL),
         strDataConsecutive(TRUE),  // only becomes false if data sets merged for IUS
         varcharFetchBuffer(NULL),
         mcis_data(NULL), mcis_nextData(NULL), mcs_usingme(0), //for MC
         nullIndics(NULL), nullCount(0), mcis_rowsRead(0),
         eligibleForVarCharCompaction(FALSE),
         ISdatatype(-1), ISlength(-1), ISvcLenUsed(-1), ISprecision(-1), ISscale(-1),
         ISSelectExpn(STMTHEAP), prevRowCount(0), prevUEC(0),
         reason(HS_REASON_UNKNOWN), newReason(HS_REASON_MANUAL),
         colSecs(0), coeffOfVar(0), oldAvgVarCharSize(-1), rowsRead(0), sumSize(0),
         avgVarCharSize(-1), skewedValuesCollected(FALSE),
         mcis_nullIndBitMap(NULL), mcis_colsUsedMap(NULL),
         mcis_colsMissingMap(NULL), mcis_memFreed(FALSE),
         mcis_totalMCmemNeeded(0), mcis_groupHead(TRUE), mcis_next(NULL), mcis_readAsIs (FALSE),
         delayedRead(FALSE), cbf(NULL),
         boundaryValues(NULL), MFVValues(NULL), allKeysInsertedIntoCBF(FALSE),
         backwardWarningCount(0)
   {
     strcpy(readTime, "0001-01-01 00:00:00");  // default if new
 #ifdef _TEST_ALLOC_FAILURE
     initFilter();
 #endif
   }

 HSColGroupStruct::~HSColGroupStruct()
   {
     delete clistr;
     delete colNames;
     delete groupHist;
     delete next;
     freeISMemory();
   }

 /**
  * Sets the length of the IS type of the column, and the estimated average
  * length if the mapped type is varchar and compacted varchars are in use.
  *
  * @param len Natural length of the column type as represented for IS.
  * @param maxCharColumnLengthInBytes Maximum length character string limit
  *    imposed by UPDATE STATS
  */
 void HSColGroupStruct::setISlength(Lng32 len, Lng32 maxCharColumnLengthInBytes)
 {
   ISlength = MINOF(len, maxCharColumnLengthInBytes);
   if (!DFS2REC::isAnyVarChar(ISdatatype))
     return;

   if (eligibleForVarCharCompaction)
   {
     // If average varchar size is known from older histograms
     // use that; otherwise use a rule of thumb estimate.

     if (oldAvgVarCharSize >= 1)
       ISvcLenUsed = oldAvgVarCharSize + 4;  // + 4 to allow a little growth
     else
     {
       // In the absence of older histograms, assume the average
       // length is about one half the maximum length. (After all,
       // the user presumably chose varchar to save some space.)
       // Note: This code path can only be taken on the first call
       // to this method. Later calls happen only when we overran
       // buffer space, but in that case oldAvgVarCharSize will
       // have been calculated.
       double ruleOfThumbEstimate = len/2;
       if (ruleOfThumbEstimate < 4)
         ruleOfThumbEstimate = 4;
       ISvcLenUsed = ruleOfThumbEstimate;
     }

     if (ISvcLenUsed > ISlength)
       ISvcLenUsed = ISlength;  // don't allow it to exceed the actual size!

     HSLogMan *LM = HSLogMan::Instance();
     if (LM->LogNeeded())
     {
       sprintf(LM->msg, "Considering compaction on varchar column %s:", colNames->data());
       LM->Log(LM->msg);
       sprintf(LM->msg, "    Declared len: %d, estimated avg len: %d", ISlength, ISvcLenUsed);
       LM->Log(LM->msg);
       sprintf(LM->msg, "    Compaction%schosen", ISvcLenUsed == len ? " not " : " ");
       LM->Log(LM->msg);
     }
   }
   else
     ISvcLenUsed = ISlength;
 }

 /**
  * Determines the number of bytes to allocate for strData, the buffer holding
  * all the data for a char/varchar column with internal sort. The overall memory
  * requirement for the column has already been calculated. Here, we just need to
  * determine how much of it is for the data buffer. The other parts are the array
  * of objects (ISFixedChar or ISVarChar) that reference the content, and for
  * a compacted varchar, the buffer that the uncompacted varchar data is read into.
  *
  * @param rows Number of rows being retrieved to calculate stats on.
  * @return Number of bytes to allocate to hold the char or varchar content.
  */
 size_t HSColGroupStruct::strDataMemNeeded(Int64 rows)
 {
   size_t result = memNeeded;
   HS_ASSERT(DFS2REC::isAnyCharacter(ISdatatype));
   if (DFS2REC::isAnyVarChar(ISdatatype))
   {
     result -= (rows * sizeof(ISVarChar));  // deduct space for ptrs to content
     if (isCompacted())
       result -= (inflatedVarcharContentSize() * MAX_ROWSET);  // deduct pre-compaction fetch buffer
   }
   else
     result -= (rows * sizeof(ISFixedChar));

   return result;
 }


 // Allocates memory necessary for internal sort for the group. If an allocation
 // failure occurs, free any memory already allocated for the current column and
 // exit.
 //
 // Parameters:
 //   rows -- number of rows the allocation is based on.
 //   allocStrData -- allocate strData array as well as data array for char types.
 //                   If false, don't change strData ptr; if non-null, it is
 //                   still in use. This parameter defaults to TRUE.
 //   recalcMemNeeded -- if TRUE, recalculate memNeeded based on the number of rows.
 //                      This happens when mergeDatasetsForIUS() calls this fn to
 //                      allocate for the original number of rows + inserted rows.
 //                      This parameter defaults to FALSE.
 //
 // Return value:
 //   TRUE if all went well, FALSE if a memory allocation request failed.
 //
 NABoolean HSColGroupStruct::allocateISMemory(Int64 rows,
                                              NABoolean allocStrData,
                                              NABoolean recalcMemNeeded)
 {
   HSLogMan *LM = HSLogMan::Instance();
   NAWchar* wptr;
   NABoolean allAllocated = TRUE;

   try
     {
      // Get new value for memNeeded if necessary. This is the case when IUS
      // datasets are being merged and extra room may be needed for the insert
      // dataset. Null indicators are not affected by this.
      if (recalcMemNeeded)
        {
          // Null indicators should already have been allocated.
          HS_ASSERT(nullIndics || !colSet[0].nullflag);
          GetHSContext()->getMemoryRequirementsForOneGroup(this, rows);
        }
      else if (colSet[0].nullflag)
        {
          // Allocate enough null indicators for a single rowset -- they are
          // processed after each rowset read. If it is a NOT NULL column, do
          // not allocate the null indicator space.
          //
          nullIndics = (short*)newBuiltinArr(short, MAX_ROWSET);
          if (!nullIndics)
            throw ISMemAllocException();

          // setup the null indicator bit for MC IS if this column is used
          // by MCs
          if (mcs_usingme > 0)
          {
              if (!mcis_nullIndBitMap)
              {
                 mcis_nullIndBitMap = new (STMTHEAP) NABitVector (STMTHEAP);
                 if (!mcis_nullIndBitMap)
                    throw ISMemAllocException();
              }
          }
        }
      else
        nullIndics = NULL;

      if (DFS2REC::isAnyCharacter(ISdatatype))
        {
          // For all char types, data and nextData will have ptrs to pool of char
          // data read into strData, assigned after each rowset fetch. Blank out
          // entire varchar allocation to allow simple blank-padded comparison
          // of different-length strings.
          //
          // memNeeded includes length for ptrs; subtract it from amount of space
          // to allocate for the strings themselves.
          size_t strMemNeeded = strDataMemNeeded(rows);
          // round up to next multiple of sizeof(short)
          strMemNeeded = sizeof(short) * ( (strMemNeeded + sizeof(short) - 1) / sizeof(short) );
          strMemAllocated = strMemNeeded;  // remember for overrun checking


          if (DFS2REC::isAnyVarChar(ISdatatype))
            {
              if (allocStrData)
                {
                  // Allocate as short to ensure proper alignment for length field.
                  strData = (char*)(newBuiltinArr(short, strMemNeeded / sizeof(short)));
                  if (!strData)
                    throw ISMemAllocException();

                  // Unless varchar values are compacted after being read, blank out the
                  // entire varchar allocation to allow simple blank-padded comparison
                  // of different-length strings.
                  if (isCompacted())
                  {
                    size_t fetchMemNeeded = (inflatedVarcharContentSize() * MAX_ROWSET);
                    fetchMemNeeded = sizeof(short) *
                                     ( (fetchMemNeeded + sizeof(short) - 1) / sizeof(short) );
                    varcharFetchBuffer =
                          (char*)(newBuiltinArr(short, fetchMemNeeded / sizeof(short)));
                    if (!varcharFetchBuffer)
                      throw ISMemAllocException();
                  }
                  else
                  {
                    if (ISdatatype == REC_BYTE_V_DOUBLE)
                      {
                        Int64 uvCharCount = strMemNeeded / sizeof(NAWchar);
                        wptr = (NAWchar*)strData + uvCharCount - 1;
                        while (uvCharCount--)
                          *wptr-- = L' ';
                      }
                    else
                      memset(strData, ' ', strMemNeeded);
                  }
                }
 #ifdef _TEST_ALLOC_FAILURE
              data = newObjArrX(ISVarChar, rows, allocCount++);
 #else
              data = newObjArr(ISVarChar, rows);
 #endif
              if (!data)
                throw ISMemAllocException();
            }
          else
            {
              if (allocStrData)
                {
                  strData = newBuiltinArr(char, strMemNeeded);
                  if (!strData)
                    throw ISMemAllocException();
                }
 #ifdef _TEST_ALLOC_FAILURE
              data = newObjArrX(ISFixedChar, rows, allocCount++);
 #else
              data = newObjArr(ISFixedChar, rows);
 #endif
              if (!data)
                throw ISMemAllocException();
            }

          strNextData = strData;
        }
      else
        {
          data = newBuiltinArr(char, memNeeded);
          if (!data)
            throw ISMemAllocException();

          // for MC IS copy the data to an MC structure to be used later by
          // MCs. We need to copy the non-char data because the IS logic
          // for single columns sort this data in place
          if (mcs_usingme > 0)
          {
            mcis_data = newBuiltinArr(char, memNeeded);
            if (!mcis_data)
              throw ISMemAllocException();
          }
        }
     }
   catch(ISMemAllocException&)
     {
       allAllocated = FALSE;
       freeISMemory();   // get rid of partial allocation
     }

   return allAllocated;
 }

 // freeStrData defaults to TRUE, and causes the memory allocated for char types
 // to be freed (for char types, the data array contains simple objects that
 // reference the actual strings, which are in strData). mergeDatasetsForIUS()
 // allocates a new data array, but reuses ptrs into strData, so we need to
 // avoid freeing strData when this fn is called to remove the old data array.
 void HSColGroupStruct::freeISMemory(NABoolean freeStrData, NABoolean freeMCData)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "Freeing IS memory for column %s", colNames->data());
       LM->Log(LM->msg);
     }

   // used by MC in-memory since a column might have been processed but kept
   // in memory to be used by MCs
   mcis_memFreed = TRUE;

   NADELETEBASIC(nullIndics, STMTHEAP);
   nullIndics = NULL;

   if (DFS2REC::isAnyCharacter(ISdatatype))
     {
       if (DFS2REC::isAnyVarChar(ISdatatype))
         {
           delObjArr(data, ISVarChar)
           if (freeStrData && freeMCData)
             {
               NADELETEBASIC((short*)strData, STMTHEAP);
               strData = NULL;
               if (isCompacted())
                 {
                   NADELETEBASIC((short*)varcharFetchBuffer, STMTHEAP);
                   varcharFetchBuffer = NULL;
                 }
             }
         }
       else
         {
           delObjArr(data, ISFixedChar);
           if (freeStrData && freeMCData)
             {
               NADELETEBASIC((char*)strData, STMTHEAP);
               strData = NULL;
             }
         }
     }
   else
   {
      NADELETEBASIC((char *)data, STMTHEAP);
      if (freeMCData && mcis_data)
         NADELETEBASIC((char *)mcis_data, STMTHEAP);
   }

   data = NULL;
   nextData = NULL;
   strNextData = NULL;

   if (freeMCData)
   {
      mcis_data = NULL;
      mcis_nextData = NULL;
   }
 }

 // Copy constructor
 HSColumnStruct::HSColumnStruct(const HSColumnStruct &src, NAMemory *h)
   {
     colname         = new (h) NAString(src.colname->data(), h);
     externalColumnName = new (h) NAString(src.externalColumnName->data(), h);
     colnum          = src.colnum;
     position        = src.position;
     datatype        = src.datatype;
     nullflag        = src.nullflag;
     charset         = src.charset;
     length          = src.length;
     precision       = src.precision;
     scale           = src.scale;
     caseInsensitive = src.caseInsensitive;
     colCollation    = src.colCollation;
   }


 HSColumnStruct::~HSColumnStruct()
   {
     if (colname != NULL)
       {
         delete colname;
         colname = NULL;
       }
     if (externalColumnName != NULL)
       {
         delete externalColumnName;
         externalColumnName = NULL;
       }
   }

 // Assignment operator
 HSColumnStruct& HSColumnStruct::operator=(const HSColumnStruct& rhs)
   {
     if (this == &rhs)
         return *this;

     NABasicObject::operator=(rhs);

     // Do not do the delete of "colname"; this may cause delete of data that is
     // already deleted; colname is on the STMTHEAP and will be destructed at the
     // end of the statement. [SOL 10-070822-6995]
     colname         = new (STMTHEAP) NAString(rhs.colname->data(), STMTHEAP);
     externalColumnName = new (STMTHEAP) NAString(rhs.externalColumnName->data(), STMTHEAP);
     colnum          = rhs.colnum;
     position        = rhs.position;
     datatype        = rhs.datatype;
     nullflag        = rhs.nullflag;
     charset         = rhs.charset;
     length          = rhs.length;
     precision       = rhs.precision;
     scale           = rhs.scale;
     caseInsensitive = rhs.caseInsensitive;
     colCollation    = rhs.colCollation;

     return *this;
   }

 NABoolean HSColumnStruct::operator==(const HSColumnStruct& other) const
   {
     return ( colnum == other.colnum );
   }

 //
 // METHOD:  addTruncatedColumnReference()
 //
 // PURPOSE: Generates a column reference or a SUBSTRING
 //          on a column reference which truncates the
 //          column to the maximum length allowed in
 //          UPDATE STATISTICS.
 //
 // INPUT:   'qry' - the SQL query string to append the
 //          reference to.
 //          'colInfo' - struct containing datatype info
 //          about the column.
 //
 void HSColumnStruct::addTruncatedColumnReference(NAString & qry)
   {
     HSGlobalsClass *hs_globals = GetHSContext();
     Lng32 maxLengthInBytes = hs_globals->maxCharColumnLengthInBytes;
     bool isOverSized = DFS2REC::isAnyCharacter(datatype) &&
                            (length > maxLengthInBytes);
     if (isOverSized)
       {
         // Note: The result data type of SUBSTRING is VARCHAR, always.
         // But if the column is CHAR, many places in the ustat code are not
         // expecting a VARCHAR. So, we stick a CAST around it to convert
         // it back to a CHAR in these cases.

         NABoolean isFixedChar = DFS2REC::isSQLFixedChar(datatype);
         if (isFixedChar)
           qry += "CAST(";
         qry += "SUBSTRING(";
         qry += externalColumnName->data();
         qry += " FOR ";

         char temp[20];  // big enough for "nnnnnn)"
         sprintf(temp,"%d)", maxLengthInBytes / CharInfo::maxBytesPerChar(charset));
         qry += temp;
         if (isFixedChar)
           {
             qry += " AS CHAR(";
             qry += temp;
             qry += ")";
           }
         qry += " AS ";
         qry += externalColumnName->data();
       }
     else
       qry += externalColumnName->data();
   }


 HSInterval::HSInterval()
       : rowCount_(0), uecCount_(0), gapMagnitude_(0), highFreq_(FALSE),
         MFVrowCount_(0), MFV2rowCount_(0), squareCntSum_(0), origUec_(0),
         origRC_(0), origMFV_(0)
   {}


 HSInterval::~HSInterval()
   {
   }

 FrequencyCounts::FrequencyCounts()
 {
   for (ULng32 i=0; i<FC_NUM_HT_BUCKETS; i++)
     bigfiHT_[i].next_ = 0;

   reset();
 }

 FrequencyCounts::~FrequencyCounts()
 {
   for (ULng32 i=0; i<FC_NUM_HT_BUCKETS; i++)
     {
       struct entry *ent = bigfiHT_[i].next_;

       while (ent)
         {
           struct entry *next = ent->next_;
           NADELETEBASIC(ent, STMTHEAP);
           ent = next;
         }
     }
 }

 FrequencyCounts& FrequencyCounts::operator=(const FrequencyCounts& rhs)
 {
   memmove(fiArr_, rhs.fiArr_, sizeof rhs.fiArr_);
   for (Int32 i=0; i<FC_NUM_HT_BUCKETS; i++)
     {
       bigfiHT_[i].ix_ = rhs.bigfiHT_[i].ix_;
       bigfiHT_[i].value_ = rhs.bigfiHT_[i].value_;

       entry* rhsNextEntry = rhs.bigfiHT_[i].next_;
       entry** nextEntryAddr = &bigfiHT_[i].next_;
       while (rhsNextEntry)
         {
           *nextEntryAddr = newEntry(rhsNextEntry->ix_, rhsNextEntry->value_);
           nextEntryAddr = &(*nextEntryAddr)->next_;
           rhsNextEntry = rhsNextEntry->next_;
         }
       *nextEntryAddr = NULL;
     }

   return *this;
 }

 void FrequencyCounts::reset()
 {
   for (ULng32 i=0; i<FC_NUM_STORED_VALUES; i++) fiArr_[i] = 0;
   resetHT();
 }

 void FrequencyCounts::increment(Int64 i, ULng32 val)
 {
   if (i > 0)
     {
       UInt32 ix =
         (UInt32) ((i > UINT_MAX) ? UINT_MAX : i);

       if (ix < FC_NUM_STORED_VALUES)
         fiArr_[ix] += val;
       else
         incrementHT(ix, val);
     }
 }

 ULng32 FrequencyCounts::operator[](Int64 i)
 {
   if (i==0) return 0;

   UInt32 ix =
     (UInt32) ((i > UINT_MAX) ? UINT_MAX : i);

   if (ix < FC_NUM_STORED_VALUES)
     return fiArr_[ix];
   else
     return lookupHT(ix);
 }

 void FrequencyCounts::mergeTo(FrequencyCounts &f)
 {
   ULng32 i;

   for (i=0; i<FC_NUM_STORED_VALUES; i++)
     f.increment(i, fiArr_[i]);

   for (i=0; i<FC_NUM_HT_BUCKETS; i++)
     if (bigfiHT_[i].ix_>0)
       {
         struct entry *ent = &(bigfiHT_[i]);
         while (ent)
           {
             f.increment(ent->ix_, ent->value_);
             ent = ent->next_;
           }
       }
 }

 FrequencyCounts::entry *FrequencyCounts::hashToBucket(ULng32 ix)
 {
   return &(bigfiHT_[ix % FC_NUM_HT_BUCKETS]);
 }

 ULng32 FrequencyCounts::lookupHT(ULng32 ix)
 {
   struct entry *ent = hashToBucket(ix);
   while (ent)
     {
       if (ix == ent->ix_) return ent->value_;
       ent = ent->next_;
     }

   return 0;
 }

 void FrequencyCounts::resetHT()
 {
   for (ULng32 i=0; i<FC_NUM_HT_BUCKETS; i++)
     {
       struct entry *ent = &(bigfiHT_[i]);
       while (ent)
         {
           ent->ix_ = 0;
           ent->value_ = 0;
           ent = ent->next_;
         }
     }
 }

 FrequencyCounts::entry *FrequencyCounts::newEntry(ULng32 ix, ULng32 value)
 {
   struct entry *ent = new (STMTHEAP) struct entry;

   ent->ix_ = ix;
   ent->value_ = value;
   ent->next_ = 0;

   return ent;
 }

 void FrequencyCounts::incrementHT(ULng32 ix, ULng32 val)
 {
   struct entry *ent = hashToBucket(ix);

   while (ent)
     {
       if (ix == ent->ix_)
         {
           ent->value_ += val;
           return;
         }

       if (ent->next_)
         ent = ent->next_;
       else
         {
           ent->next_ = newEntry(ix, 1);
           return;
         }
     }
 }

 GapKeeper::GapKeeper(Int32 gapsToKeep)
   :gapsToKeep_(gapsToKeep)
 {
   gaps_ = new (STMTHEAP) double[gapsToKeep];
   for (Int32 i=0; i<gapsToKeep; i++)
     gaps_[i] = 0;
 }

 GapKeeper::~GapKeeper()
   {
   }

 double GapKeeper::smallest()
 {
   Int32 i=gapsToKeep_ - 1;
   while (i >= 0 && gaps_[i] == 0)
     i--;
   return (i<0 ? 0 : gaps_[i]);
 }

 Int32 GapKeeper::qualifyingGaps(double minAcceptableGap)
 {
   Int32 count = 0;
   for (Int32 i=0; i<gapsToKeep_; i++)
     {
       if (gaps_[i] >= minAcceptableGap)
         count++;
     }
   return count;
 }

 /**************************************************************************/
 /* METHOD:  insert()                                                      */
 /* PURPOSE: If the passed gap is one of the gapsToKeep_ highest gaps, add */
 /*          it to the gaps_ array in the proper order, else do nothing.   */
 /* PARAMS:  gap(in) -- Gap magnitude to (possibly) add to ordered array.  */
 /* RETCODE: TRUE if the value was inserted, FALSE otherwise.              */
 /**************************************************************************/
 NABoolean GapKeeper::insert(double gap)
 {
   // Gap magnitudes are stored in the array in ascending order. Go through the
   // array until you find the proper location for the new value (if any), insert
   // it, and push everything after it down.
   Int32 i = 0;
   while (i<gapsToKeep_ && gaps_[i] >= gap)
     i++;
   if (i < gapsToKeep_)
     {
       for (Int32 j=gapsToKeep_ - 2; j>=i; j--)
         gaps_[j+1] = gaps_[j];
       gaps_[i] = gap;
       return TRUE;
     }
   else
     return FALSE;
 }


 HSHistogram::HSHistogram(Lng32 intcount,
                          Int64 rowcount,
                          Lng32 gapIntervals,      // target # of gap intervals
                          Lng32 highFreqIntervals, // # extra for high frequency values
                          NABoolean sampleUsed,
                          NABoolean singleIntervalPerUec)
       : gapKeeper_(gapIntervals), intCount_(intcount), currentInt_(0), remRows_(rowcount),
         hasNull_(FALSE), fi_(0), gapIntCount_(0),
         targetGapIntervals_(gapIntervals),
         highFreqIntervalsAllotted_(highFreqIntervals), highFreqIntervalsUsed_(0),
         singleIntervalPerUec_(singleIntervalPerUec),
         maxStddev_(0)
   {
     if(singleIntervalPerUec_) // single distinct value per histogram interval
       {
         step_ = 1;
       }
     else                      // more than one distinct value per histogram interval
       {
         // Don't use intervals provided for high frequency values in step calculation),
         // and also account for gap intervals, and the intervals that precede them,
         // which will be half empty on average.
         step_ = originalStep_
               = rowcount / (intcount - highFreqIntervals - (Lng32)(gapIntervals * 1.5));
       }

     // Calculate frequency count required to establish a separate interval for
     // a single distinct value.
     highFreqThreshold_ =
         (Int64)(rowcount * (CmpCommon::getDefaultNumeric(USTAT_FREQ_SIZE_PERCENT) / 100));

     // In addition to the number of intervals requested, we need 2 more for
     // interval 0 and possibly the NULL interval. We also add extras based
     // on the number of gap intervals we want to keep. These will be used
     // as necessary to maintain a stable step size as low-frequency intervals
     // are used for gaps.
     maxAllowedInts_ = intCount_ + 10 * gapIntervals;
     // Put on global heap, NABasicObject and MX heaps do not
     // work with arrays.
     intArry_ = new HSInterval[maxAllowedInts_ + 2];

     if (sampleUsed &&
         CmpCommon::getDefault(USTAT_FORCE_MOM_ESTIMATOR) == DF_OFF)
       {
         // sampling is being used and the new estimator, so we
         // need to maintain frequency counts
         // Put on global heap, NABasicObject and MX heaps do not
         // work with arrays.
         fi_ = new FrequencyCounts[maxAllowedInts_ + 2];
       }

     // If handling gaps, set up the gap multiplier that determines which gaps
     // are big enough to consider.
     if (targetGapIntervals_ > 0)
       {
         HSGlobalsClass *hs_globals = GetHSContext();
         HSLogMan *LM = HSLogMan::Instance();
         float sampleRatio = (float) hs_globals->samplePercentX100 / 10000;
           // Sample percent is stored in HISTOGRAMS table as percent * 100.
         gapMultiplier_ = CmpCommon::getDefaultNumeric(USTAT_GAP_SIZE_MULTIPLIER);
         if (sampleRatio > 0 && sampleRatio < 1)
           gapMultiplier_ += ((double)(1 - sampleRatio));
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "%f used as gap multiplier", gapMultiplier_);
             LM->Log(LM->msg);
           }
       }
   }

 HSHistogram::~HSHistogram()
   {
     if (intArry_ != NULL)
       {
         delete [] intArry_;
         intArry_ = NULL;
       }
     deleteFiArray();
   }

 void HSHistogram::deleteFiArray()
   {
     if (fi_ != NULL)
       {
         delete [] fi_;
         fi_ = NULL;
       }
   }


 Lng32 HSHistogram::getLowValue(HSDataBuffer &lval, NABoolean addParen)
   {
     Lng32 retcode = 0;

     lval = intArry_[0].boundary_;
     if (addParen)
       {
         retcode = lval.addParenthesis();
         HSHandleError(retcode);
       }

     return retcode;
   }

 Lng32 HSHistogram::getHighValue(HSDataBuffer &hval, NABoolean addParen)
   {
     Lng32 i;
     Lng32 retcode = 0;

     //The high value for the histogram table may not be NULL. Although,
     //it is OK for histogram_interval table.
     if (hasNull_)
       i = currentInt_- 1;
     else
       i = currentInt_;

     hval = intArry_[i].boundary_;
     if (addParen)
       {
         retcode = hval.addParenthesis();
         HSHandleError(retcode);
       }

     return retcode;
   }


 /***********************************************/
 /* METHOD:  processIntervalValues()            */
 /* PURPOSE: processes entire rowset, while     */
 /*          maintaining the INTERVAL boundary  */
 /*          information. Each interval will    */
 /*          have the #rows, #uec, low/hi values*/
 /* PARAMS:  boundaryRowSet - rowset containing */
 /*                           data values.      */
 /*          group - group intervals are being  */
 /*                  processed for.             */
 /*                       external format string*/
 /*          rowsInSet - Accumulation of how    */
 /*                      many data records have */
 /*                      been processed.        */
 /*          currentGapAvg -- Average gap       */
 /*                           size so far.      */
 /* RETCODE:  0 - successful                    */
 /*          -1 - failure                       */
 /* ASSUMPTIONS: The data is SORTED(increasing) */
 /* NOTES:   bndry:   )[----](----]...(----]    */
 /*          int#     0   1     2  ...   n      */
 /***********************************************/
 Lng32 HSHistogram::processIntervalValues(boundarySet<myVarChar>* boundaryRowSet,
                                         HSColGroupStruct *group,
                                         Int64 &rowsInSet,
                                         double currentGapAvg)
   {
     Lng32 retcode = 0;
     ULng32 sumSize = 0;
     NABoolean maxLongLimit = FALSE;
     NABoolean computeVarCharSize = FALSE;
     HSLogMan *LM = HSLogMan::Instance();

     // is it single group and data type is varchar? set it to TRUE.
     if (group->computeAvgVarCharSize())
       computeVarCharSize = TRUE;
     rowsInSet = 0;
     Lng32 indexOfLastNonNullRow =
                   (boundaryRowSet->nullInd[boundaryRowSet->size-1] == -1
                     ? boundaryRowSet->size - 2
                     : boundaryRowSet->size - 1);
     Lng32 colCount = group->colCount;
     NABoolean allColumnValsNULLs = FALSE;
     if(colCount > 1)
     {
 	char * value = (char *)&(boundaryRowSet->data[boundaryRowSet->size-1]);
 	value += sizeof(short);

 	NAWString boundaryVal((NAWchar *)value);
 	NAWString nullStr(L"NULL");
 	for (Lng32 colNum=1; colNum < colCount; colNum++)
 		nullStr.append(L",NULL");
 	if(boundaryVal == nullStr)
 	{
 		allColumnValsNULLs = TRUE;
 		indexOfLastNonNullRow = boundaryRowSet->size - 2;
 	}
     }

     for (Lng32 i=0; i < boundaryRowSet->size; i++)
       {
         rowsInSet += boundaryRowSet->dataSum[i];
         // varchar type AND max long limit not reached.
         if (computeVarCharSize AND (!maxLongLimit) )
         {
           // Not a NULL value.
           if (boundaryRowSet->avgVarCharNullInd[i] != -1)
           {
             sumSize = sumSize + (ULng32)(boundaryRowSet->avgVarCharSize[i] *
                                  boundaryRowSet->dataSum[i]);
             if (sumSize >= INT_MAX)
             {
               maxLongLimit = TRUE;
               group->avgVarCharSize = (double)sumSize / (double)rowsInSet;
             }
           }
         } // computeVarCharSize

         if ((boundaryRowSet->nullInd[i] == -1) ||
                 (allColumnValsNULLs && i == boundaryRowSet->size-1))
           {
             //10-030703-7604: It is expected that once a NULL value has been
             //processed, no other rows follow.
             HS_ASSERT(NOT hasNullInterval());
             addNullInterval(boundaryRowSet->dataSum[i], group->colCount);
           }
         else
           {
             // Determine the gap magnitude associated with this distinct value
             // and whether or not it is a significant one, and include this in
             // the information passed to addIntervalData. Note that if an interval
             // qualifies as high frequency, it will not be considered as a gap
             // interval.
             double currGapMagnitude = boundaryRowSet->gapMagnitude[i];
             NABoolean bigGap = currGapMagnitude > currentGapAvg * gapMultiplier_
                                  && boundaryRowSet->dataSum[i] <= highFreqThreshold_
                                  && gapKeeper_.insert(currGapMagnitude);
             retcode = addIntervalData(boundaryRowSet->data[i],
                                       group,
                                       boundaryRowSet->dataSum[i],
                                       bigGap,
                                       currGapMagnitude,
                                       i == indexOfLastNonNullRow);
           }
       }

     if ( (computeVarCharSize) AND (group->avgVarCharSize == -1) )
     {
       group->avgVarCharSize = (double)sumSize / (double)rowsInSet;

     }
     if (LM->LogNeeded())
       {
         char rowStr[30];

         convertInt64ToAscii(rowsInSet, rowStr);
         sprintf(LM->msg, "\tProcessed %5s rows, %5d uec, retcode = %5d",
                              rowStr,
                              boundaryRowSet->size,
                              retcode);
         LM->Log(LM->msg);
       }

     return retcode;
   }

 void HSHistogram::addNullInterval(const Int64 nullCount, const Lng32 colCount)
   {
     NAWString nullTxt = WIDE_("NULL");
     if(colCount > 1)
     {
       for(int i = 1; i < colCount; i++)
         nullTxt.append(WIDE_(", NULL"));
     }

     HSDataBuffer nullVal(nullTxt.data());

     if (currentInt_ == 0)          // first datapoint
       intArry_[0].boundary_ = nullVal;
     currentInt_++;
     double nullCountd                   = (double) nullCount;
     intArry_[currentInt_].rowCount_     = nullCount;
     intArry_[currentInt_].uecCount_     = 1;
     intArry_[currentInt_].squareCntSum_ = nullCountd * nullCountd;
     intArry_[currentInt_].boundary_     = nullVal;
     if (fi_) fi_[currentInt_].increment(nullCount);

     hasNull_ = TRUE;
   }

 Lng32 HSHistogram::updateMCInterval(const HSDataBuffer &lowval,
                                    const HSDataBuffer &hival)
   {
     intArry_[0].boundary_ = lowval;
     intArry_[1].boundary_ = hival;
     return 0;
   }

 /***********************************************/
 /* METHOD:  getTotalCounts()                   */
 /* PURPOSE: Determine the SUM(rowcount) and the*/
 /*          SUM(uec).                          */
 /* IN/OUT:  rowCount = SUM(rowcount)           */
 /*          uecCount = SUM(uec)                */
 /* NOTES:   bndry:   )[----](----]...(----]    */
 /*          int#     0   1     2  ...   n      */
 /***********************************************/
 void HSHistogram::getTotalCounts(Int64 &rowCount, Int64 &uecCount)
   {
     rowCount = 0;
     uecCount = 0;

     //Since interval 0 is exclusive, the iterator must
     //begin at 1. Since the last interval is inclusive,
     //we must process the last interval.
     for (Lng32 i=1; i <= currentInt_; i++)
       {
         rowCount += intArry_[i].rowCount_;
         uecCount += intArry_[i].uecCount_;
       }
   }


 // Get the original total counts
 void HSHistogram::getOrigTotalCounts(Int64 &rowCount, Int64 &uecCount)
   {
     rowCount = 0;
     uecCount = 0;

     //Since interval 0 is exclusive, the iterator must
     //begin at 1. Since the last interval is inclusive,
     //we must process the last interval.
     for (Lng32 i=1; i <= currentInt_; i++)
       {
         rowCount += getIntOrigRC(i);
         uecCount += getIntOrigUec(i);
       }
   }


 /***********************************************/
 /* METHOD:  getTotalUec()                      */
 /* PURPOSE: Determine the SUM(uec)             */
 /* RETURN:  SUM(uec)                           */
 /* NOTES:   bndry:   )[----](----]...(----]    */
 /*          int#     0   1     2  ...   n      */
 /***********************************************/
 Int64 HSHistogram::getTotalUec()
   {
     Int64 uec = 0;

     //Since interval 0 is exclusive, the iterator must
     //begin at 1. Since the last interval is inclusive,
     //we must process the last interval.
     for (Lng32 i=1; i <= currentInt_; i++)
       {
         uec += intArry_[i].uecCount_;
       }
     return uec;
   }


 /***********************************************/
 /* METHOD:  getTotalRowCount()                 */
 /* PURPOSE: Determine the SUM(rowcount)        */
 /* RETURN:  SUM(rowcount)                      */
 /* NOTES:   bndry:   )[----](----]...(----]    */
 /*          int#     0   1     2  ...   n      */
 /***********************************************/
 Int64 HSHistogram::getTotalRowCount()
   {
     Int64 rowCount = 0;

     //Since interval 0 is exclusive, the iterator must
     //begin at 1. Since the last interval is inclusive,
     //we must process the last interval.
     for (Lng32 i=1; i <= currentInt_; i++)
       {
         rowCount += intArry_[i].rowCount_;
       }
     return rowCount;
   }

 Lng32 HSHistogram::getParenthesizedIntBoundary(Lng32 intNum, HSDataBuffer &intBoundary)
   {
     Lng32 retcode = 0;
     HS_ASSERT(intNum >= 0);
     HS_ASSERT(intNum <= currentInt_);

     intBoundary = intArry_[intNum].boundary_;
     retcode = intBoundary.addParenthesis();
     HSHandleError(retcode);

     return retcode;
   }

 Lng32 HSHistogram::getParenthesizedIntMFV(Lng32 intNum, HSDataBuffer &intMostFreqVal)
   {
     Lng32 retcode = 0;
     HS_ASSERT(intNum >= 0);
     HS_ASSERT(intNum <= currentInt_);

     intMostFreqVal = intArry_[intNum].mostFreqVal_;
     retcode = intMostFreqVal.addParenthesis();
     HSHandleError(retcode);

     return retcode;
   }

 // initialize static variables of HSGlobalsClass
 THREAD_P COM_VERSION HSGlobalsClass::schemaVersion = COM_VERS_UNKNOWN;
 THREAD_P Lng32 HSGlobalsClass::autoInterval = 0;

 Int64 HSGlobalsClass::getMinRowCountForSample()
 {
   return (Int64)CmpCommon::getDefaultLong(USTAT_MIN_ROWCOUNT_FOR_SAMPLE);
 }

 Int64 HSGlobalsClass::getMinRowCountForLowSample()
 {
   return (Int64)CmpCommon::getDefaultLong(USTAT_MIN_ROWCOUNT_FOR_LOW_SAMPLE);
 }

 void HSHistogram::logAll(const char* title)
 {
   HSLogMan *LM = HSLogMan::Instance();

   if (!LM->LogNeeded()) return;

   LM->Log(title);

   char buf[50];

   sprintf(buf, "Total RC=%d, Total UEC=%d", (Lng32)getTotalRowCount(), (Lng32)getTotalUec());
   LM->Log(buf);

   logIntervals();
 }

 /****************************************************************************/
 /* METHOD:  logIntervals()                                                  */
 /* PURPOSE: Logs the current state of the intervals of the histogram. If    */
 /*          curr and lookahead are non-negative, the function is being used */
 /*          to trace the intermediate steps in merging undersized gap       */
 /*          intervals into adjacent intervals (removeLesserGapIntervals()). */
 /*          In this case, the current and lookahead interval for the merge  */
 /*          procedure are indicated in addition to displaying the basic     */
 /*          information for each interval.                                  */
 /* PARAMS:  curr(in)      -- Index of current interval, -1 if not needed.   */
 /*          lookahead(in) -- Index of interval being evaluated, -1 if not   */
 /*                           needed.                                        */
 /****************************************************************************/
 void HSHistogram::logIntervals(Lng32 curr, Lng32 lookahead)
 {
   HSLogMan *LM = HSLogMan::Instance();

   if (!LM->LogNeeded())
      return;

   const Int32 MAX_CHARS = 20;
   char boundary[MAX_CHARS+1];
   char mostFreqVal[MAX_CHARS+1];

   sprintf(LM->msg, "   |int|  rc | uec | boundary | mfv#|2mfv#| mfv value|  gap size|");
   LM->Log(LM->msg);

   Int32 numBoundaryChars;
   wchar_t* wchPtr;
   for (Lng32 i=0; i<=getNumIntervals(); i++)
     {
       wchPtr = (wchar_t*)(intArry_[i].boundary_.data());
       numBoundaryChars = MINOF(MAX_CHARS, intArry_[i].boundary_.numChars());
       for (Lng32 k=0;k<numBoundaryChars; k++)
         boundary[k] = (char)(*wchPtr++);
       boundary[numBoundaryChars] = '\0';

       wchPtr = (wchar_t*)(intArry_[i].mostFreqVal_.data());
       numBoundaryChars = MINOF(MAX_CHARS, intArry_[i].mostFreqVal_.numChars());
       for (Lng32 k=0;k<numBoundaryChars; k++)
         mostFreqVal[k] = (char)(*wchPtr++);
       mostFreqVal[numBoundaryChars] = '\0';

       // Note that if curr and lookahead are negative, they will match no intvl
       // value and the C-> and L-> indicators will not appear in the output.
       sprintf(LM->msg, "%3s|%3d|%5d|%5d|%10s|%5d|%5d|%10s|%10.2f|",
              i == curr ? "C->" : (i == lookahead ? "L->" : " "),
              i,
              (Int32)intArry_[i].rowCount_,  // blows up without the cast
              (Int32)intArry_[i].uecCount_,
              boundary,
              (Int32)intArry_[i].MFVrowCount_,
              (Int32)intArry_[i].MFV2rowCount_,
              mostFreqVal,
              intArry_[i].gapMagnitude_
              );
       LM->Log(LM->msg);
     }
   LM->Log("==================================================");
 }

 void HSGlobalsClass::logDiagArea(const char* title)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (!LM->LogNeeded())
     return;

   Lng32 warnings = diagsArea.getNumber(DgSqlCode::WARNING_);
   Lng32 errors = diagsArea.getNumber(DgSqlCode::ERROR_);

   sprintf(LM->msg, "%s, HSGlobalsClass::diagsArea, #errors=%d, #warnings=%d", title, errors, warnings);
   LM->Log(LM->msg);
 }

 /**************************************************************************/
 /* METHOD:  sumFrequencies()                                              */
 /* PURPOSE: For each value count up to FC_NUM_STORED_VALUES (larger counts are*/
 /*          in a hash table instead of an array indexed by count), sum    */
 /*          the frequencies in the fi array into a single FrequencyCounts */
 /*          object that spans all intervals. Each element in fi           */
 /*          represents an interval, and stores an array in which the      */
 /*          value at each index is the number of values that occur that   */
 /*          many times. For example, fi[4][3] == 8 indicates that in      */
 /*          interval 4, there are 8 values that occur 3 times.            */
 /*          NOTE: This function is only used for logging and is called    */
 /*          before and after the merging of intervals for gap encoding    */
 /*          as a rough check (frequency counts over FC_NUM_STORED_VALUES are  */
 /*          not validated, and accurate distribution of counts within     */
 /*          intervals is not determined) on the correctness of merging    */
 /*          frequency counts as intervals are merged.                     */
 /* PARAMS:  fc(out)         -- FrequencyCounts object that accumulates the*/
 /*                             sum of the frequencies in fi.              */
 /*          maxInterval(in) -- Index of interval being evaluated.         */
 /*          fi(in)          -- Array of frequency counts to sum.          */
 /**************************************************************************/
 static void sumFrequencies(FrequencyCounts& fc, Int32 maxInterval, FrequencyCounts* fi)
 {
   if (!fi)
     return;

   for (Int32 i=1; i<=maxInterval; i++)
     for (Int32 valCount=0; valCount<FC_NUM_STORED_VALUES; valCount++)
       {
         fc.increment(valCount, fi[i][valCount]);
       }
 }

 /**************************************************************************/
 /* METHOD:  compareFC()                                                   */
 /* PURPOSE: Compare the two frequency counts to determine if for each     */
 /*          incidence count, there are the same number of values in each  */
 /*          that occur that many times. See the comments for the          */
 /*          sumFrequencies() function above.                              */
 /* PARAMS:  fc1,fc2(in) -- The FrequencyCounts objects to compare.        */
 /* RETURN:  TRUE if a difference is found.                                */
 /**************************************************************************/
 NABoolean compareFC(FrequencyCounts& fc1, FrequencyCounts& fc2)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     {
       sprintf(LM->msg,
               "Comparing overall (sum across intervals) frequency counts (only frequencies up to %d)",
               FC_NUM_STORED_VALUES);
       LM->Log(LM->msg);
     }
   NABoolean differenceFound = FALSE;
   for (Int32 valCount=0; valCount<FC_NUM_STORED_VALUES; valCount++)
     {
       if (fc1[valCount] != fc2[valCount])
         {
           differenceFound = TRUE;
           if (LM->LogNeeded())
             {
               sprintf(LM->msg,
                       "Difference in number of values occurring %d times: %d vs. %d",
                       valCount, fc1[valCount], fc2[valCount]);
               LM->Log(LM->msg);
             }
         }
     }
   if (!differenceFound && LM->LogNeeded())
     {
       sprintf(LM->msg,
               "No difference found in FrequencyCounts objects comparing frequencies up to %d",
               FC_NUM_STORED_VALUES);
       LM->Log(LM->msg);
     }

   return differenceFound;
 }

 /**************************************************************************/
 /* METHOD:  removeLesserGapIntervals()                                    */
 /* PURPOSE: Now that the true overall gap average is known, revisit the   */
 /*          gap intervals we created, and keep the gapIntCount ones with  */
 /*          the greatest gap magnitude. The rest are merged into adjacent */
 /*          intervals, unless that would create an interval of excessive  */
 /*          height.                                                       */
 /* PARAMS:  trueGapAvg(in) -- The actual gap average, after all gaps have */
 /*                            been seen.                                  */
 /**************************************************************************/
 void HSHistogram::removeLesserGapIntervals(double trueGapAvg)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Int32 log_mergedGaps=0;             // for logging use only
   FrequencyCounts *FCbeforePtr = 0; // logging only; heavyweight object, so use ptr
                                     //   to avoid instantiation unless necessary

   // The minimum acceptable gap magnitude is found using the multiplier specified
   // by USTAT_GAP_SIZE_MULTIPLIER. However, we bump up the minimum enough to
   // limit the intervals selected to the number we want to keep.
   double minGapToKeep = gapMultiplier_ * trueGapAvg;
   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "min acceptable gap based on gap average = %f", minGapToKeep);
       LM->Log(LM->msg);
       if (fi_)
         {
           // FCbeforePtr is used to compare overall frequency counts with those
           // obtained after merging intervals later in this function, and is
           // deleted immediately after that.
           // Create on global heap - FrequencyCounts is not NABasicObject.
           FCbeforePtr = new FrequencyCounts;
           sumFrequencies(*FCbeforePtr, currentInt_, fi_);
         }
     }
   if (gapKeeper_.smallest() > minGapToKeep)
     {
       minGapToKeep = gapKeeper_.smallest();
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "min acceptable gap to accommodate number of gap intervals  = %f", minGapToKeep);
           LM->Log(LM->msg);
         }
     }
   if (LM->LogNeeded() && targetGapIntervals_ < gapIntCount_)
     {
       sprintf(LM->msg, "Too many gaps collected; will try to merge back the "
                         "%d that are less than %f",
                         gapIntCount_ - gapKeeper_.qualifyingGaps(minGapToKeep), minGapToKeep);
       LM->Log(LM->msg);
     }

   Lng32 keptIntInx = 1;
   Lng32 lookaheadIntInx = 2;  // intArry_[1] can't be gap interval
   // currentInt_ is the index of the highest interval used.
   Lng32 highestIntInx = currentInt_;
   Lng32 mergeTargetIntInx;
   double gapMagnitudeOfLookahead;
   NABoolean firstTime = TRUE;   // 1st time through loop
   while (lookaheadIntInx <= highestIntInx)
     {
       // If more than 25 intervals, only log the initial interval state (output
       // too voluminous otherwise).
       if (LM->LogNeeded() && (currentInt_ <= 25 || firstTime))
         {
           firstTime = FALSE;
           logIntervals(keptIntInx, lookaheadIntInx);
         }
       gapMagnitudeOfLookahead = intArry_[lookaheadIntInx].gapMagnitude_;
       //@ZXgap -- try merging undersized non-gap intervals as well as small gap intervals
       //if (gapMagnitudeOfLookahead > 0 && gapMagnitudeOfLookahead < minGapToKeep)
       if (gapMagnitudeOfLookahead < minGapToKeep    // includes non-gap intervals...
           && !intArry_[lookaheadIntInx].highFreq_)  // ...unless they are high frequency
         {
           // Merge this small gap with an adjacent interval, unless neither
           // adjacent interval is suitable.
           mergeTargetIntInx = mergeInterval(lookaheadIntInx, keptIntInx, minGapToKeep);
           if (mergeTargetIntInx > 0)
             {
               // Also merge frequency counts if being maintained.
               if (fi_)
                 fi_[lookaheadIntInx].mergeTo(fi_[mergeTargetIntInx]);
               log_mergedGaps++;
               lookaheadIntInx++;
               continue;
             }
         }

       // If the code above did not find an undersized gap and successfully
       // merge it with another, we reach this point. This code handles non-gap
       // intervals, including the null interval if it exists, large gap intervals
       // that will be kept, and small gap intervals that could not be merged.
       keptIntInx++;
       if (keptIntInx < lookaheadIntInx)
         {
           intArry_[keptIntInx] = intArry_[lookaheadIntInx];
           if (fi_)
             fi_[keptIntInx] = fi_[lookaheadIntInx];
         }
       lookaheadIntInx++;
 #if 0 // Set to #if 1 for more logging info for gaps.
       if (LM->LogNeeded())
         {
           if (lookaheadIntInx-1 == highestIntInx && hasNullInterval())
             {
               sprintf(LM->msg, "Null interval moved from %d to %d",
                                lookaheadIntInx-1, keptIntInx);
               LM->Log(LM->msg);
             }
           else if (gapMagnitudeOfLookahead >= minGapToKeep)
             {
               sprintf(LM->msg, "Big gap interval moved from %d to %d",
                                lookaheadIntInx-1, keptIntInx);
               LM->Log(LM->msg);
             }
           else if (gapMagnitudeOfLookahead == 0)
             {
               sprintf(LM->msg, "Non-gap interval moved from %d to %d",
                                lookaheadIntInx-1, keptIntInx);
               LM->Log(LM->msg);
             }
           else
             {
               sprintf(LM->msg, "Unmerged small gap interval moved from %d to %d",
                                lookaheadIntInx-1, keptIntInx);
               LM->Log(LM->msg);
             }
         }
 #endif
     }

 #if 0 // Set to #if 1 for more logging info for gaps.
     if (LM->LogNeeded())
     {
       logIntervals(keptIntInx, lookaheadIntInx);
       sprintf(LM->msg, "Of %d original gap intervals, %d were under the gap "
                        "magnitude threshold. %d intervals were able to be "
                        "merged into adjacent intervals (some of which may have "
                        "also been undersized gap intervals)",
                        gapIntCount_, gapIntCount_ - targetGapIntervals_, log_mergedGaps);
       LM->Log(LM->msg);
       sprintf(LM->msg, "After eliminating smaller gap intervals, there are "
                        "now %d instead of %d total intervals",
                        keptIntInx + 1, highestIntInx + 1); // 1 extra for interval 0
       LM->Log(LM->msg);
       if (fi_)
         {
           FrequencyCounts FCafter;
           sumFrequencies(FCafter, keptIntInx, fi_);
           compareFC(*FCbeforePtr, FCafter);
         }
     }
 #endif
   delete FCbeforePtr;

   // Update the stored interval count to reflect the removed intervals.
   currentInt_ = keptIntInx;

   GetHSContext()->checkTime("after adjusting/eliminating gap intervals");
 }


 /**************************************************************************/
 /* METHOD:  mergeInterval()                                               */
 /* PURPOSE: Merges the specified interval into an adjacent one if         */
 /*          possible. It will not be merged if it cannot be done without  */
 /*          producing an interval that exceeds the target bucket height   */
 /*          In addition, it cannot be merged with a gap interval that we  */
 /*          intend to keep, or a special high frequency interval.         */
 /* PARAMS:  intervalToMerge(in) -- Index of the interval we intend to     */
 /*                                 merge into another.                    */
 /*          prevInterval(in)    -- Index of the interval preceding the    */
 /*                                 one to merge. Won't necessarily be 1   */
 /*                                 less, there may be one or more removed */
 /*                                 intervals between them.                */
 /*          gapThreshold(in)    -- Only allow merges with gap intervals   */
 /*                                 having gap magnitude less than this.   */
 /* RETCODE: Index of the interval that was merged into, or 0 if a merge   */
 /*          could not be done.                                           */
 /**************************************************************************/
 Lng32 HSHistogram::mergeInterval(const Lng32 intervalToMerge,
                                 const Lng32 prevInterval,
                                 const double gapThreshold)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 intervalToMergeInto;
   Lng32 nextInterval = intervalToMerge + 1;

   // Set highest interval to currentInt_ less the NULL interval.
   Lng32 highestInterval = currentInt_;
   if (hasNull_) highestInterval--; // Can't merge NULL interval.
   if (intervalToMerge > highestInterval) return 0;
   // Determine whether the two candidate intervals are eligible.
   NABoolean excludeNext = nextInterval > highestInterval ||
                           intArry_[nextInterval].gapMagnitude_ >= gapThreshold ||
                           intArry_[nextInterval].highFreq_;
   NABoolean excludePrev = prevInterval < 1 ||  // actually, should never be <1
                           intArry_[prevInterval].gapMagnitude_ >= gapThreshold ||
                           intArry_[prevInterval].highFreq_;

   // Pick the adjacent interval that meets the criteria, and would have the
   // lowest resulting row count.
   if (excludeNext)
     {
       if (excludePrev)
         {
           // Neither previous nor next interval is suitable for merging; leave
           // the small gap interval as is.
           if (LM->LogNeeded())
             {
               sprintf(LM->msg,
                       "Could not merge interval %d into either adjacent interval",
                       intervalToMerge);
               LM->Log(LM->msg);
             }
           return 0;  // neither adjacent interval can be merged into
         }
       else
         // Can't merge into next, but previous is ok.
         intervalToMergeInto = prevInterval;
     }
   else if (excludePrev)
     // Can't merge into previous, but next is ok.
     intervalToMergeInto = nextInterval;
   else if (intArry_[prevInterval].rowCount_ <= intArry_[nextInterval].rowCount_)
     // Previous and next are both ok, but previous is smaller.
     intervalToMergeInto = prevInterval;
   else
     // Previous and next are both ok, but next is smaller.
     intervalToMergeInto = nextInterval;

   // Do the actual merge here, but only if combined interval would not exceed
   // our bucket height.
   Int64 combinedRowCount = intArry_[intervalToMergeInto].rowCount_ +
                            intArry_[intervalToMerge].rowCount_;
   if (combinedRowCount > originalStep_)
     {
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Could not merge interval %d into interval %d; "
                            "combined interval too large",
                            intervalToMerge, intervalToMergeInto);
           LM->Log(LM->msg);
         }
       return 0;
     }
   else
     {
       intArry_[intervalToMergeInto].rowCount_ = combinedRowCount;
       intArry_[intervalToMergeInto].uecCount_ += intArry_[intervalToMerge].uecCount_;
       intArry_[intervalToMergeInto].squareCntSum_ += intArry_[intervalToMerge].squareCntSum_;
       intArry_[intervalToMergeInto].gapMagnitude_ = 0;
       mergeMFVs(intervalToMergeInto, intervalToMerge);
       if (intervalToMergeInto < intervalToMerge)
         intArry_[intervalToMergeInto].boundary_ = intArry_[intervalToMerge].boundary_;
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Merged interval %d into interval %d",
                            intervalToMerge, intervalToMergeInto);
           LM->Log(LM->msg);
         }
       return intervalToMergeInto;
     }
 }

 /**************************************************************************/
 /* METHOD:  mergeMFVs()                                                   */
 /* PURPOSE: Merges the most frequent values of two intervals.             */
 /* PARAMS:  to(in)   -- Index of the interval to be merged into           */
 /*          from(in) -- Index of the interval being merged from.          */
 /**************************************************************************/
 void HSHistogram::mergeMFVs(const Lng32 to, const Lng32 from)
 {
   if (intArry_[from].MFVrowCount_ > intArry_[to].MFVrowCount_)
   {
     // Interval being merged from has larger MFV.
     Int64 saveMfvCount = intArry_[to].MFVrowCount_;
     intArry_[to].MFVrowCount_ = intArry_[from].MFVrowCount_;
     intArry_[to].mostFreqVal_ = intArry_[from].mostFreqVal_;

     // Now check if MFV2 of 'from' interval larger than MFV of 'to'.
     // do the 2mfv update only the rowCount_ > uecCount_ in the 'to' interval.
     if ( intArry_[to].rowCount_ > intArry_[to].uecCount_ ) {
        if (intArry_[from].MFV2rowCount_ > saveMfvCount)
             intArry_[to].MFV2rowCount_ = intArry_[from].MFV2rowCount_;
        else intArry_[to].MFV2rowCount_ = saveMfvCount;
     }
   }
   else
   {
     // Interval being merged into has larger MFV. Check if MFV2 of 'to'
     // should be changed to MFV of 'from'. Do the 2mfv update only when
     // rowCount_ > uecCount_ in the 'to' interval.
     if ( intArry_[to].rowCount_ > intArry_[to].uecCount_ &&
          intArry_[to].MFV2rowCount_ < intArry_[from].MFVrowCount_
        )
          intArry_[to].MFV2rowCount_ = intArry_[from].MFVrowCount_;
   }
 }

 // Adjustmen of MVV and 2MFV for interval i, when the rowcount and uec of the
 // interval are about to be set to newEstRow and newEstUec respectively.
 void HSHistogram::adjustMFVand2MFV(const Lng32 i, double newEstRow, double newEstUec)
 {
    if ( newEstRow > newEstUec && newEstUec > 1.0 ) {
      double ratio = newEstRow / getIntOrigRC(i);
      setIntMFVRowCount(i,
                            MAXOF(Int64(newEstRow / newEstUec) + 1,              // Set MFV to the max of the lower bound of the MFV
                                  (Int64)((double)getIntOrigMFV(i)*ratio) // and the scaled original MFV (by the ratio
                                                                          // of increase in rowcount of the interval).
                                 )
                       );
      setIntMFV2RowCount(i, (Int64)((double)getIntMFVRowCount(i)/2.0));   // Following the Zipf's law, 2MFV set to half of the MFV.
    } else
      if ( newEstUec == 1.0 )
        setIntMFV2RowCount(i, 0);
 }

 void HSHistogram::maintainEndIntervalForIUS(float avgRCPerInterval, Lng32 intNum)
 {
 }

 // Hash function used for jitLogThresholdHash.
 static ULng32 hashString(const NAString& str) { return str.hash(); }

 THREAD_P NABoolean HSGlobalsClass::performISForMC_ = FALSE;

 HSGlobalsClass::HSGlobalsClass(ComDiagsArea &diags)
   : catSch(new(STMTHEAP) NAString(STMTHEAP)),
     isHbaseTable(FALSE),
     isHiveTable(FALSE),
     hasOversizedColumns(FALSE),
     user_table(new(STMTHEAP) NAString(STMTHEAP)),
     numPartitions(0),
     hstogram_table(new(STMTHEAP) NAString(STMTHEAP)),
     hsintval_table(new(STMTHEAP) NAString(STMTHEAP)),
     hsperssamp_table(new(STMTHEAP) NAString(STMTHEAP)),
     hssample_table(new(STMTHEAP) NAString(STMTHEAP)),
     statstime(new(STMTHEAP) NAString(STMTHEAP)),

     externalSampleTable(FALSE),
     tableType(UNKNOWN_TYPE),tableFormat(UNKNOWN),

     actualRowCount(0), sampleRowCount(0),
     rowChangeCount(-1),   // -1 indicates not determined yet
     intCount(0), dupGroup(0),
     errorFile(STMTHEAP),

     groupCount(0), singleGroupCount(0), singleGroup(NULL), multiGroup(NULL),
     parserError(ERROR_SYNTAX), errorCount(0), diagsArea(diags),
     sampleTableUsed(FALSE),
     samplingUsed(FALSE),

     optFlags(0), sampleOption(new(STMTHEAP) NAString(STMTHEAP)),
     sampleValue1(0), sampleValue2(0), sampleOptionUsed(FALSE),
     unpartitionedSample(FALSE), objDef(NULL),
     statsNeeded_(TRUE),

     // histogram automation
     sampleSeconds(0), columnSeconds(0), samplePercentX100(0),
     allMissingStats(FALSE),
     // other
     requestedByCompiler(FALSE),

     //iusSampleInMem(NULL),
     iusSampleDeletedInMem(NULL),
     iusSampleInsertedInMem(NULL),
     sampleIExists_(FALSE),
     PST_IUSrequestedSampleRows_(NULL),
     PST_IUSactualSampleRows_(NULL),
     sampleRateAsPercetageForIUS(0),
     minRowCtPerPartition_(-1),
     sample_I_generated(FALSE),
     PSRowUpdated(FALSE),
     jitLogThreshold(0),
     stmtStartTime(0),
     jitLogOn(FALSE),
     isUpdatestatsStmt(FALSE),
     maxCharColumnLengthInBytes(ActiveSchemaDB()->getDefaults().
                getAsLong(USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES)),
     hbaseCacheSizeCQDsSet_(FALSE),
     hiveMaxStringLengthCQDSet_(FALSE)
   {
     // Must add the context first in the constructor.
     contID_ = AddHSContext(this);

     // Save parserflags
     SQL_EXEC_GetParserFlagsForExSqlComp_Internal(savedParserFlags);

     // Special SQLParser flags to deal with namespaces and funny signs like '@'
     // and security
     SQL_EXEC_SetParserFlagsForExSqlComp_Internal(
       //dmALLOW_SPECIALTABLETYPE | dmALLOW_PHONYCHARACTERS | dmINTERNAL_QUERY_FROM_EXEUTIL);
       dmINTERNAL_QUERY_FROM_EXEUTIL);

     // On first ustat statement of session, allocate and fill the static hash
     // table of table-specific elapsed-time thresholds.
     if (!jitLogThresholdHash)
       {
         jitLogThresholdHash = new(CTXTHEAP) JitLogHashType(hashString, 77, TRUE, CTXTHEAP);
         initJITLogData();
       }

     performISForMC_ = FALSE;
   }

 HSGlobalsClass::~HSGlobalsClass()
 {
   // If this was an IUS execution, make sure the row for the source table in
   // SB_PERSISTENT_SAMPLES is modified to reflect that an IUS operation is no
   // longer in progress.
   if (PSRowUpdated)
     end_IUS_work();

   // Used in end_IUS_work(), must call it first.
   NADELETEBASIC(PST_IUSrequestedSampleRows_, STMTHEAP);
   NADELETEBASIC(PST_IUSactualSampleRows_, STMTHEAP);

   // reset the parser flags that were set in the constructor
   SQL_EXEC_ResetParserFlagsForExSqlComp_Internal(savedParserFlags);

   HSColGroupStruct *group = singleGroup;
   while (group)
   {
     if (group->groupHist) delete group->groupHist; // Delete single object from ColHeap
     group = group->next;
   }
   group = multiGroup;
   while (group)
   {
     if (group->groupHist) delete group->groupHist; // Delete single object from ColHeap
     group = group->next;
   }

   // If just-in-time logging was activated, turn logging back off.
   if (jitLogOn)
     {
       HSLogMan* LM = HSLogMan::Instance();
       sprintf(LM->msg, "***** End of Just-In-Time logging session for table %s *****\n",
               user_table->data());
       LM->Log(LM->msg);
       LM->StopLog();
     }

   // Must delete the context last in the destructor.
   DeleteHSContext(contID_);
 }


 // -----------------------------------------------------------------------
 // Initialize stats schema for Hive or native HBase tables if needed
 // -----------------------------------------------------------------------
 Lng32 HSGlobalsClass::InitializeStatsSchema()
   {
     Lng32 retcode = 0;

                                               /*==============================*/
                                               /*   CREATE HIVE STATS SCHEMA   */
                                               /*==============================*/
     if (isHiveCat(objDef->getCatName()))
       {
         HSTranMan *TM = HSTranMan::Instance(); // Must have transaction around this.
         TM->Begin("Create schema for hive stats.");
         NAString ddl = "CREATE SCHEMA IF NOT EXISTS ";
         ddl.append(HIVE_STATS_CATALOG).append('.').append(HIVE_STATS_SCHEMA).
             append(" AUTHORIZATION DB__ROOT");
         retcode = HSFuncExecQuery(ddl, -UERR_INTERNAL_ERROR, NULL,
                                   "Creating schema for Hive statistics", NULL,
                                   NULL);
         HSHandleError(retcode);
         TM->Commit(); // In case if there is an error, the commit will log the error (if
                       // ULOG is enabled. Otherwise, the method will commit the tranaction.
       }
                                               /*=====================================*/
                                               /*   CREATE HBASE STATS SCHEMA         */
                                               /*   typically as trafodion.hbasestats */
                                               /*=====================================*/
     if (isNativeHbaseCat(objDef->getCatName()))
       {
         HSTranMan *TM = HSTranMan::Instance(); // Must have transaction around this.
         TM->Begin("Create schema for native hbase stats.");
         NAString ddl = "CREATE SCHEMA IF NOT EXISTS ";
         ddl.append(HBASE_STATS_CATALOG).append('.').append(HBASE_STATS_SCHEMA).
             append(" AUTHORIZATION DB__ROOT");
         retcode = HSFuncExecQuery(ddl, -UERR_INTERNAL_ERROR, NULL,
                                   "Creating schema for native HBase statistics", NULL,
                                   NULL);
         HSHandleError(retcode);
         TM->Commit(); // In case if there is an error, the commit will log the error (if
                       // ULOG is enabled. Otherwise, the method will commit the tranaction.
       }
                                               /*==============================*/
                                               /*    CREATE HISTOGRM TABLES    */
                                               /*==============================*/
     retcode = CreateHistTables(this);
     HSHandleError(retcode);

     return retcode;
   }


 // -----------------------------------------------------------------------
 //
 // -----------------------------------------------------------------------
 Lng32 HSGlobalsClass::Initialize()
   {
     Lng32 retcode = 0;
     NAString query;
     HSCursor cursor;
     Int64 xSampleSet = 0;
     char intStr[30], intStr2[30];
     Int64 inserts, deletes, updates;
     HSLogMan *LM = HSLogMan::Instance();
     HSGlobalsClass *hs_globals = GetHSContext();

     // Seed the random number generator used in quicksort().
     srand(time(NULL));
     // Set the default catalog names for Hive and HBase.
     if (defaultHiveCatName == NULL)
        defaultHiveCatName = new (GetCliGlobals()->exCollHeap()) NAString("");
     else
       (*defaultHiveCatName) = "";

     CmpCommon::getDefault(HIVE_CATALOG, (*defaultHiveCatName), FALSE);
     (*defaultHiveCatName).toUpper();

     if (defaultHbaseCatName == NULL)
        defaultHbaseCatName = new (GetCliGlobals()->exCollHeap()) NAString("");
     else
       (*defaultHbaseCatName) = "";

     CmpCommon::getDefault(HBASE_CATALOG, (*defaultHbaseCatName), FALSE);
     (*defaultHbaseCatName).toUpper();

     // initialize stats schema if this is a Hive or native HBase table
     retcode = InitializeStatsSchema();
     HSHandleError(retcode);

                                              /*==============================*/
                                              /*   CREATE UNDOCUMENTED VIEW   */
                                              /*==============================*/
     if (optFlags & VIEWONLY_OPT)
       {
         if (isHiveCat(objDef->getCatName()))
           {
             *CmpCommon::diags() << DgSqlCode(-UERR_NO_VIEWONLY) << DgString0("hive");
             return -1;
           }
         else if (isHbaseCat(objDef->getCatName()))
           {
             *CmpCommon::diags() << DgSqlCode(-UERR_NO_VIEWONLY) << DgString0("HBase");
             return -1;
           }
         LM->Log("\tCREATE HISTOGRAM VIEW");
         retcode = CreateHistView(this);
         HSHandleError(retcode);
         return 0;
       }
                                               /*==============================*/
                                               /*=      DETERMINE #ROWS       =*/
                                               /*==============================*/
     Int64 userSuppliedRowCount = actualRowCount;

     sample_I_generated = FALSE;

     LM->StartTimer("getRowCount()");
     Int32 errorCode = 0;
     Int32 breadCrumb = 0;
     if ((optFlags & ROWCOUNT_OPT) == 0)
       actualRowCount = objDef->getRowCount(currentRowCountIsEstimate_,
                                            inserts, deletes, updates,
                                            numPartitions,
                                            minRowCtPerPartition_,
                                            errorCode /* out */,
                                            breadCrumb /* out */,
                                            optFlags & (SAMPLE_REQUESTED | IUS_OPT));
     else
       {
         // skip the potentially expensive step of determining the row
         // count, if it was specified by the user
         actualRowCount = userSuppliedRowCount;
         currentRowCountIsEstimate_ = FALSE;
         inserts = deletes = updates = 0;
         numPartitions = 1;
         minRowCtPerPartition_ = actualRowCount;
         if (LM->LogNeeded())
           {
             convertInt64ToAscii(actualRowCount, intStr);
             sprintf(LM->msg, "\t\t\tUser provided rowcount: rows=%s", intStr);
             LM->Log(LM->msg);
           }
       }
     LM->StopTimer();
     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\tcurrentRowCountIsEstimate_=%d from getRowCount()", currentRowCountIsEstimate_);
         LM->Log(LM->msg);
         sprintf(LM->msg, "\terrorCode=%d, breadCrumb=%d", errorCode, breadCrumb);
         LM->Log(LM->msg);
         if (errorCode == HBC_ERROR_ROWCOUNT_EST_EXCEPTION)
           {
             const char * jniErrorStr = HSFuncGetJniErrorStr();
             if (strlen(jniErrorStr) > 0)
               {
                 LM->Log("\tJNI exception info:");
                 LM->Log(jniErrorStr);
               }
           }
       }

     if (errorCode == HBC_ERROR_ROWCOUNT_EST_EXCEPTION)
       {
         *CmpCommon::diags() << DgSqlCode(-UERR_BAD_EST_ROWCOUNT) << DgInt0(errorCode)
                             << DgInt1(breadCrumb) << DgString0(HSFuncGetJniErrorStr());
         return -1;
       }
     else if (errorCode)
       {
         *CmpCommon::diags() << DgSqlCode(-UERR_BAD_EST_ROWCOUNT) << DgInt0(errorCode)
                             << DgInt1(breadCrumb) << DgString0("");
         return -1;
       }

     // We only allow an estimate when sampling, and then only if the
     // estimated row count is at least ustat_min_estimate_for_rowcount (CQD),
     // because estimation error is high for small or fragmented tables.
     // Otherwise a SELECT COUNT(*) is used to get the actual row count in
     // place of the estimate, unless the user supplied his own row count..
     if (currentRowCountIsEstimate_ && !(optFlags & CLEAR_OPT))
       {
         if (convertInt64ToDouble(actualRowCount) <   // may be 0 (no estimate) or -1 (error doing estimation)
                      CmpCommon::getDefaultNumeric(USTAT_MIN_ESTIMATE_FOR_ROWCOUNT))
           {
             if (LM->LogNeeded() && actualRowCount > 0)
             {
               sprintf(LM->msg, "Estimated row count " PF64 " rejected (below size threshhold).",
                       actualRowCount);
               LM->Log(LM->msg);
             }
             LM->StartTimer("Execute query to get row count");
             query  = "SELECT COUNT(*) FROM ";
             query += getTableName(user_table->data(), nameSpace);
             query += " FOR READ UNCOMMITTED ACCESS";
             retcode = cursor.fetchNumColumn(query, NULL, &actualRowCount);
             LM->StopTimer();
             HSHandleError(retcode);
             currentRowCountIsEstimate_ = FALSE;
             if (LM->LogNeeded())
               {
                 convertInt64ToAscii(actualRowCount, intStr);
                 sprintf(LM->msg, "\n\t\tUsing select count(*): rows=%s", intStr);
                 LM->Log(LM->msg);
               }
           }
       }
     else  // row change counts won't be accurate if estimation was used
       rowChangeCount = inserts + deletes + updates;

     HS_ASSERT(actualRowCount >= 0);

     Int64 youWillLikelyBeSorry =
       ActiveSchemaDB()->getDefaults().getAsDouble(USTAT_YOULL_LIKELY_BE_SORRY);
     if ((actualRowCount >= youWillLikelyBeSorry) &&
        !(optFlags & CLEAR_OPT) &&
        !(optFlags & SAMPLE_REQUESTED) &&
        !(optFlags & IUS_OPT))
       {
         // attempt to do UPDATE STATISTICS on a big table without sampling,
         // which could take a really long time
         if ((optFlags & NO_SAMPLE) == 0)  // if explicit NO SAMPLE is missing
           {
             // raise an error on the chance that omitting the SAMPLE clause
             // was accidental
             HSFuncMergeDiags(-UERR_YOU_WILL_LIKELY_BE_SORRY);
             retcode = -1;
             HSHandleError(retcode);
           }
       }


                                              /*===================================*/
                                              /*=  DETERMINE "NECESSARY" COLUMNS  =*/
                                              /*===================================*/

     // Determining which columns are implied by the NECESSARY clause, if present,
     // has to be deferred until this point so the row count is known (determined
     // immediately above). The row count is used in determining obsolescence of
     // existing histograms.
     if (optFlags & NECESSARY_OPT)
       {
         // Sporadic failures of AddNecessaryColumns() have been observed,
         // so we use retry-with-delay to try to circumvent the problem. Note that
         // the retry limit is specified by a CQD that is specific to this fn rather
         // than the one used for other ustat retries, although the same delay is used.
         Int32 centiSecsDelay = getDefaultAsLong(USTAT_RETRY_DELAY);
         Int32 retryLimit = getDefaultAsLong(USTAT_RETRY_NEC_COLS_LIMIT);

         // Save the state of the group lists, so they can be restored in between retries.
         HSColGroupStruct* oldSingleGroup = singleGroup;
         HSColGroupStruct* oldMultiGroup = multiGroup;
         Lng32 oldGroupCount = groupCount;

         ComDiagsArea& diagsArea = hs_globals->diagsArea;
         Lng32 diagMark = diagsArea.mark();

         NABoolean groupStateOK = TRUE;
         Int32 retry;
         for (retry = 0; retry <= retryLimit && groupStateOK; retry++)
           {
             retcode = AddNecessaryColumns();
             if (retcode < 0)
               {
                 // An error occurred.
                 if (retry < retryLimit &&
                     (groupStateOK = removeGroups(groupCount - oldGroupCount,  // intentional = (not ==)
                                                  oldSingleGroup, oldMultiGroup)))
                   {
                     DELAY_CSEC(centiSecsDelay); // short delay before next attempt
                     if (LM->LogNeeded())
                       LM->Log("!!! Retrying AddNecessaryColumns() !!!");
                   }
               }
             else
               break;  // successful execution, exit retry loop
           }

         // If we found errors, but retried successfully, rewind past the errors
         // on the failed attempts. Otherwise, error messages will be displayed
         // in spite of successful execution.
         if (retry > 0 && retcode >= 0)
           diagsArea.rewind(diagMark, TRUE);
         if (retcode == HS_EOF)
           retcode = 0;
         HSHandleError(retcode);
       }

                                              /*=============================*/
                                              /*=  SEE IF NEW STATS NEEDED  =*/
                                              /*=============================*/
     // This can't be done before checking columns implied by NECESSARY (above).
     if (groupCount == 0 ||                        // no grouplist specified
         (optFlags & CLEAR_OPT))                   // CLEAR option used
       {
         statsNeeded_ = FALSE;                     // do not collect statistics
         return 0;
       }
                                              /*==============================*/
                                              /*=  DETERMINE SAMPLING RATIO  =*/
                                              /*==============================*/
     if (actualRowCount > 0)
       {
         if (CmpCommon::getDefault(USTAT_USE_BACKING_SAMPLE) == DF_ON)
           {
             if (optFlags & SAMPLE_REQUESTED)
               LM->Log("SAMPLE OPTION IGNORED. USING BACKING SAMPLE TABLE.");
             sampleRowCount = actualRowCount / 100;  //@ZXtemp -- assume 1% sample for now
           }
         //If the number of rows are less than the minimum for which we do sampling,
         //then ignore the sampling option. The table is small enough for a full
         //table scan. We make an exception for this if a persistent sample (IUS)
         //was requested.
         else if ((optFlags & SAMPLE_REQUESTED) &&
             !(optFlags & IUS_PERSIST) &&
             (convertInt64ToDouble(actualRowCount) < getMinRowCountForSample()))
           {
             LM->Log("SAMPLE OPTION IGNORED. SMALL TABLE SPECIFIED");
           }
         else
           {
             // Set sample size and percent based on sampling options.
             switch (optFlags & SAMPLE_REQUESTED)
               {
                 case SAMPLE_BASIC_0:         /*==    BASIC: NO OPTIONS     ==*/
                   // READ BASE TABLE USING A RATIO OF HIST_DEFAULT_SAMPLE_RATIO
                   // BUT NO MORE THAN HIST_DEFAULT_SAMPLE_MAX ROWS.  THE MIN SIZE
                   // OF SAMPLE IS THE NUMBER OF PARTITIONS OF THE TABLE * 2.
                 {
                   if (CmpCommon::getDefault(USTAT_USE_SLIDING_SAMPLE_RATIO) == DF_ON)
                     xSampleSet = getDefaultSlidingSampleSize(actualRowCount);
                   else
                     xSampleSet = getDefaultSampleSize(actualRowCount);
                   xSampleSet = MAXOF(xSampleSet, numPartitions*2);

                   // multiply by 100.0001 instead of 100 so that rounding
                   // errors are limited.
                   sampleTblPercent = convertInt64ToDouble(xSampleSet) /
                                      convertInt64ToDouble(actualRowCount) * 100.0001;

                   if (sampleTblPercent < 100.0)
                     {
                        sampleOptionUsed = TRUE;
                        sampleRowCount = xSampleSet;
                     }
                   break;
                 }

                 case SAMPLE_BASIC_1:         /*==     BASIC: ROWS ONLY     ==*/
                 // READ NUMBER OF ROWS SPECIFIED - NO MORE THAN ACTUAL ROWCOUNT
                 {
                   if ((optFlags & ROWCOUNT_OPT) &&
                       (sampleValue1 > actualRowCount))
                     {
                       HSFuncMergeDiags(- UERR_INVALID_OPTION,
                                        "SAMPLE ROWS",
                                        "less than or equal to ROWCOUNT");
                       retcode = -1;
                       HSHandleError(retcode);
                     }
                   else
                     {
                       xSampleSet = MINOF(sampleValue1, actualRowCount);
                       // multiply by 100.0001 instead of 100 so that rounding
                       // errors are limited.
                       sampleTblPercent = convertInt64ToDouble(xSampleSet) /
                                          actualRowCount * 100.0001;

                       if (sampleTblPercent < 100)
                         {
                           sampleOptionUsed = TRUE;

                           //estimate the number of rows in the temporary sample
                           //table. This will help determine how many partitions
                           //to create for MX tables.
                           sampleRowCount = xSampleSet;
                         }
                     }
                   break;
                 }

                 case SAMPLE_RAND_1:          /*==     RANDOM: PERCENT      ==*/
                 {
                   sampleTblPercent = convertInt64ToDouble(sampleValue1) / 10000;
                   xSampleSet = (Int64)ceil(convertInt64ToDouble(actualRowCount) *
                                sampleTblPercent / 100) ;

                   if (sampleTblPercent < 100 || (optFlags & IUS_PERSIST))
                     {
                       sampleOptionUsed = TRUE;

                       //estimate the number of rows in the temporary sample
                       //table. This will help determine how many partitions
                       //to create for MX tables.
                       sampleRowCount = xSampleSet;
                     }
                   break;
                 }

                 case SAMPLE_RAND_2:          /*==RANDOM: PERCENT w/ CLUSTER==*/
                 {
                   sampleTblPercent = convertInt64ToDouble(sampleValue1) / 10000;
                   xSampleSet = (Int64)ceil(convertInt64ToDouble(actualRowCount) *
                                sampleTblPercent / 100) ;
                   if (sampleTblPercent < 100)
                     {
                       sampleOptionUsed = TRUE;

                       //estimate the number of rows in the temporary sample
                       //table. This will help determine how many partitions
                       //to create for MX tables.
                       sampleRowCount = xSampleSet;
                     }
                   break;
                 }

                 case SAMPLE_PERIODIC:        /*==        PERIODIC          ==*/
                 {
                   if ((optFlags & ROWCOUNT_OPT) &&
                       (sampleValue2 > actualRowCount))
                     {
                       HSFuncMergeDiags(- UERR_INVALID_OPTION,
                                        "SAMPLE PERIOD",
                                        "less than or equal to ROWCOUNT");
                       retcode = -1;
                       HSHandleError(retcode);
                     }
                   else
                     {
                       sampleOptionUsed = TRUE;

                       //estimate the number of rows in the temporary sample
                       //table. This will help determine how many partitions
                       //to create for MX tables.
                       sampleRowCount = (Int64)ceil((1-((double)(sampleValue2 - sampleValue1) / (double)sampleValue2)) * (double)actualRowCount);
                     }
                   break;
                 }

                 default:                             /*== NO SAMPLING: 100%==*/
                 {
                 }
               }
             if (sampleTblPercent > 100) sampleTblPercent=100;
             if (sampleTblPercent < 0)   sampleTblPercent=0;
             samplePercentX100 = (short) (sampleTblPercent * 100);
                // saved for automation: percent * 100.
           }
       }
     else
       {                                           /* empty table             */
         statsNeeded_ = FALSE;                     /*do not collect statistics*/
       }

                                              /*==============================*/
                                              /*=    DETERMINE #INTERVALS    =*/
                                              /*==============================*/
     if (optFlags & INTERVAL_OPT)
       {
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "\t\tUSER REQUESTED %d INTERVALS", intCount);
             LM->Log(LM->msg);
           }
       }
     else
       {
         intCount = MAXOF(CmpCommon::getDefaultLong(HIST_DEFAULT_NUMBER_OF_INTERVALS), 1);
         intCount = MINOF(intCount, HS_MAX_INTERVAL_COUNT);

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "\t\tSYSTEM GENERATED %d INTERVALS", intCount);
             LM->Log(LM->msg);
           }
       }

     if (LM->LogNeeded())
       {
         convertInt64ToAscii(actualRowCount, intStr);
         convertInt64ToAscii(sampleRowCount, intStr2);
         sprintf(LM->msg, "\tTotal #rows= %s, sample rows=%s, IntervalCount = %d",
                 intStr, intStr2, intCount);
         LM->Log(LM->msg);
       }


     return 0;
   }

 // *****************************************************************************
 // *                                                                           *
 // * Function: HSGlobalsClass::isAuthorized                                    *
 // *                                                                           *
 // *   This member function determines if a user has authority to perform:     *
 // *     UPDATE STATISTICS or                                                  *
 // *     SHOWSTATS                                                             *
 // *                                                                           *
 // *****************************************************************************
 // *                                                                           *
 // *  Parameters:                                                              *
 // *                                                                           *
 // *  <isShowStats>                NABoolean                          In       *
 // *    TRUE if request is coming from a showstat request                      *
 // *                                                                           *
 // * returns:                                                                  *
 // *   TRUE - current user has privilege                                       *
 // *   FALSE - current user has no privilege or unexpected error occurred      *
 // *                                                                           *
 // * ComDiags is loaded with any unexpected errors                             *
 // *                                                                           *
 // *****************************************************************************
 NABoolean HSGlobalsClass::isAuthorized(NABoolean isShowStats)
 {
   if (!CmpCommon::context()->isAuthorizationEnabled())
     return TRUE;

   HS_ASSERT (objDef->getNATable());

   // Let keep track of how long authorization takes
   HSLogMan *LM = HSLogMan::Instance();
   LM->LogTimeDiff("Entering: HSGlobalsClass::isAuthorized");

   // Root user is authorized for all operations.
   NABoolean authorized = ComUser::isRootUserID();

   // If the current user owns the target object, they have full DDL authority
   // on the object.
   if (!authorized)
     {
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Authorization: check for object owner");
           LM->Log(LM->msg);
         }

       Int32 objOwner = objDef->getNATable()->getOwner();
       authorized = (ComUser::getCurrentUser() == objDef->getNATable()->getOwner());
     }

   // See if user has MANAGE_STATISTICS component priv
   NAString privMgrMDLoc =
          NAString(CmpSeabaseDDL::getSystemCatalogStatic()) +
          NAString(".\"") +
          NAString(SEABASE_PRIVMGR_SCHEMA) +
          NAString("\"");
   PrivMgrComponentPrivileges componentPrivileges(std::string(privMgrMDLoc.data()),&diagsArea);

   if (!authorized)
     {

       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Authorization: check for MANAGE_STATISTICS component privilege");
           LM->Log(LM->msg);
         }

       authorized = (componentPrivileges.hasSQLPriv(ComUser::getCurrentUser(),
                                                    SQLOperation::MANAGE_STATISTICS,
                                                    true));
     }

   // For SHOW STATS command, check for additional privileges
   if (!authorized && isShowStats)
     {
       // check for SHOW component privilege
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Authorization: check for SHOW component privilege");
           LM->Log(LM->msg);
         }

       authorized = (componentPrivileges.hasSQLPriv(ComUser::getCurrentUser(),
                                                     SQLOperation::SHOW,
                                                     true));
      }

    // Allow operation if requester has SELECT priv
    if (!authorized)
      {
        if (LM->LogNeeded())
          {
            sprintf(LM->msg, "Authorization: check for SELECT object privilege");
            LM->Log(LM->msg);
          }

        // check for SELECT privilege
        PrivMgrUserPrivs *privs = objDef->getNATable()->getPrivInfo();
        if (privs == NULL)
          {
            *CmpCommon::diags() << DgSqlCode(-1034);
             authorized = FALSE;
          }

        // Requester must have at least select privilege
        else if ( privs->hasSelectPriv() )
          authorized = TRUE;
        else
          {
            *CmpCommon::diags()
             << DgSqlCode( -4481 )
             << DgString0( "SELECT or MANAGE_STATISTICS" )
             << DgString1( objDef->getNATable()->getTableName().getQualifiedNameAsAnsiString() );
             authorized = FALSE;
           }
        }

    LM->LogTimeDiff("Exiting: HSGlobalsClass::isAuthorized");
    return authorized;
 }

 // Read the file, if present, containing table names and their execution
 // elapsed time thresholds. Store the thresholds in a hash table keyed by
 // the fully-qualified table names.
 void HSGlobalsClass::initJITLogData()
 {
   char* sqroot = getenv("TRAF_HOME");
   if (!sqroot)
     return;

   NAString filePath = sqroot;
   filePath.append("/logs/jit_ulog_params");
   FILE* jitParamFile = fopen(filePath.data(), "r");
   if (!jitParamFile)
     return;

   char buf[200];
   double* elapsedTimeThresholdPtr;
   Int32 itemsScanned = 0;
   while (itemsScanned != EOF)
     {
       // Use context heap instead of statement heap for keys and values placed
       // in the hash table. It is used across multiple statements.
       elapsedTimeThresholdPtr = new(CTXTHEAP) double;
       itemsScanned = fscanf(jitParamFile, "%s %lf\n",
                             buf, elapsedTimeThresholdPtr);
       if (itemsScanned == 2)
         {
           jitLogThresholdHash->insert(new(CTXTHEAP) NAString(buf),
                                       elapsedTimeThresholdPtr);
         }
       else
         {
           NADELETEBASIC(elapsedTimeThresholdPtr, CTXTHEAP);
         }
     }

 #if 0
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     {
       NAHashDictionaryIterator<NAString, double> iter(*jitLogThresholdHash);
       NAString* key;
       double* value;
       for (CollIndex i=0; i<iter.entries(); i++)
         {
           iter.getNext(key, value);
           sprintf(LM->msg, "Threshold for %s is %f", key->data(), *value);
           LM->Log(LM->msg);
         }
     }
 #endif
 }

 // Activate logging in response to detecting that the ustat statement has run
 // far longer than expected for the source table.
 // Params:
 //   checkPointName -- text describing the point at which the logging is kicking in.
 //   elapsedSeconds -- time the stmt had been running when logging was activated.
 void HSGlobalsClass::startJitLogging(const char* checkPointName, Int64 elapsedSeconds)
 {
   HSLogMan *LM = HSLogMan::Instance();

   // Turn logging on
   LM->StartLog(TRUE);
   jitLogOn = TRUE;

   // Write introductory information to log; name of table and columns being
   // processed, elapsed time and the threshold value it exceeded, name describing
   // the point at which logging was activated.
   sprintf(LM->msg,
           "***** Just-In-Time logging activated due to excessive elapsed time for table %s *****",
           user_table->data());
   LM->Log(LM->msg);
   sprintf(LM->msg, "Activation triggered %s", checkPointName);
   LM->Log(LM->msg);
   sprintf(LM->msg, "Current elapsed time = " PF64 " seconds", elapsedSeconds);
   LM->Log(LM->msg);
   sprintf(LM->msg, "Tolerance threshold = " PF64 " seconds", (Int64)jitLogThreshold);
   LM->Log(LM->msg);
   LM->Log("Column groups being processed:");
   HSColGroupStruct* group = singleGroup;
   while (group)
     {
       sprintf(LM->msg, "    %s (%s)",
               group->colNames->data(), SortStateName[group->state]);
       LM->Log(LM->msg);
       group = group->next;
     }
   group = multiGroup;
   while (group)
     {
       sprintf(LM->msg, "    %s", group->colNames->data());
       LM->Log(LM->msg);
       group = group->next;
     }
 }

 // The optimal degree of parallelism for a LOAD or UPSERT is the number of
 // partitions of the source table. This forces that by setting the cqd
 // PARALLEL_NUM_ESPS. Note that when the default for AGGRESSIVE_ESP_ALLOCATION_PER_CORE
 // is permanently changed to 'ON', we may be able to remove this.
 // tblDef -- ptr to HSTableDef from which to get the catalog and schema name of
 //           the source table.
 // tblName -- unqualified name of the source table. If NULL, then the source
 //            table is the one represented by tblDef.
 // Returns TRUE if the cqd was successfully set, FALSE otherwise. If TRUE is returned,
 // then resetEspParallelism() may be called to reset the cqd.
 static NABoolean setEspParallelism(HSTableDef* tblDef, const char* tblName = NULL)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   Lng32 numPartitions = 0;
   if (!tblName)
     numPartitions = tblDef->getNumPartitions();
   else
     {
       HSCursor cursor;
       NAString numPartitionsQuery;
       numPartitionsQuery.append("select t.num_salt_partns from \"_MD_\".OBJECTS O, \"_MD_\".TABLES T where o.catalog_name = '")
                         .append(tblDef->getCatName())
                         .append("' and o.schema_name = '")
                         .append(tblDef->getSchemaName())
                         .append("' and o.object_name = '")
                         .append(tblName)
                         .append("' and o.object_uid = t.table_uid");
       retcode = cursor.fetchNumColumn(numPartitionsQuery, &numPartitions, NULL);
       if (retcode != 0)
         {
           if (LM->LogNeeded())
             {
               sprintf(LM->msg, "PARALLEL_NUM_ESPS not set; query to get # partitions received sqlcode %d", retcode);
               LM->Log(LM->msg);
             }
           return FALSE;
         }
     }

   NABoolean espCQDUsed = FALSE;
   if (numPartitions > 1)
     {
       char temp[25];
       sprintf(temp, "'%d'", numPartitions);
       NAString espsCQD = "CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS ";
       espsCQD += temp;
       retcode = HSFuncExecQuery(espsCQD);
       if (retcode < 0)
         {
           if (LM->LogNeeded())
             {
               sprintf(LM->msg, "cqd statement to set PARALLEL_NUM_ESPS failed with %d", retcode);
               LM->Log(LM->msg);
             }
         }
       else
         {
           espCQDUsed = TRUE;
           if (LM->LogNeeded())
             {
               sprintf(LM->msg, "PARALLEL_NUM_ESPS was set to %d", numPartitions);
               LM->Log(LM->msg);
             }
         }
     }
   else if (LM->LogNeeded())
     {
       sprintf(LM->msg, "PARALLEL_NUM_ESPS not set; # partitions reported as %d", numPartitions);
       LM->Log(LM->msg);
     }

   return espCQDUsed;
 }

 // Reset the cqd PARALLEL_NUM_ESPS. This is the other bookend for setEspParallelism(),
 // which returns TRUE if the cqd is actually set within that function. If so, this
 // function can be called to reset it.
 static void resetEspParallelism()
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = HSFuncExecQuery("CONTROL QUERY DEFAULT PARALLEL_NUM_ESPS RESET");
   if (retcode)
   {
     HSLogMan *LM = HSLogMan::Instance();
     if (LM->LogNeeded())
       {
         HSLogMan *LM = HSLogMan::Instance();
         sprintf(LM->msg, "cqd statement to reset PARALLEL_NUM_ESPS returned %d", retcode);
         LM->Log(LM->msg);
       }
   }
 }

 /**********************************************/
 /* METHOD: createSampleOption()               */
 /* PURPOSE: create text for use in sample     */
 /*          query.                            */
 /* INPUT:  sampleType - the sample option type*/
 /*           set when parsing.                */
 /*         samplePercent - %                  */
 /*         sampleValue1, sampleValue2 - set   */
 /*          when parsing.                     */
 /* OUTPUT: sampleOpt - sample text option.    */
 /**********************************************/
 void createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt,
                         Int64 sampleValue1, Int64 sampleValue2)
 {
   char floatStr[30], intStr[30];
   switch (sampleType)
   {
     case SAMPLE_BASIC_0:         // BASIC: NO OPTIONS
     case SAMPLE_BASIC_1:         // BASIC: ROWS ONLY
     case SAMPLE_RAND_1:          // RANDOM: PERCENT
     case SAMPLE_RAND_2:          // RANDOM: PERCENT W/ CLUSTERS
       sprintf(floatStr, "%f", samplePercent);
       sampleOpt  = " SAMPLE RANDOM ";
       sampleOpt += floatStr;
       sampleOpt += " PERCENT ";

       if (sampleType == SAMPLE_RAND_2 && sampleValue2 > 0) // Only set for SAMPLE_RAND_2
       {
         convertInt64ToAscii(sampleValue2, intStr);
         sampleOpt += " CLUSTERS OF ";
         sampleOpt += intStr;
         sampleOpt += " BLOCKS ";
       }
       break;

     case SAMPLE_PERIODIC:        // PERIODIC
       sampleOpt  = " SAMPLE PERIODIC ";
       convertInt64ToAscii(sampleValue1, intStr);
       sampleOpt += intStr;
       sampleOpt += " ROWS EVERY ";
       convertInt64ToAscii(sampleValue2, intStr);
       sampleOpt += intStr;
       sampleOpt += " ROWS ";
       break;
     default:                     // Invalid option.
       HS_ASSERT(FALSE);
       break;
   }
 }


 /***********************************************/
 /* METHOD:  HSSample makeTableName() member    */
 /* PURPOSE: Creates a unique sample table name */
 /*          based on source table UID and      */
 /*          current time.                      */
 /* RETCODE: none.                              */
 /***********************************************/
 void HSSample::makeTableName(NABoolean isPersSample)
 {
   NABoolean unpartitionedSample = FALSE;
   if (objDef->getObjectFormat() == SQLMX)
   {
     //The naming convention used for the temporary sample table is 'TRAF_SAMPLE_'
     //followed by the object_uid of the source table and a portion of
     //the timestamp. The object_uid ensures no collisions with update stats
     //for other tables, while the timestamp chars help avoid collision when
     //two update stats are running against same table.

     // Check for PUBLIC_ACCESS_SCHEMA.  This should always exist, except
     // on NT.  So, we go ahead and create it on NT.  If for some reason
     // it does not, then the current schema will be used.
     // If the sample table is for a volatile table, then create it in the
     // same volatile schema as the source table instead of in the
     // public_access_schema.
     // Creating it in the volatile schema will make it a volatile table
     // and all attributes of a volatile table, like nullable primary keys,
     // will apply.
     // There are no security issues with a volatile table since it is
     // always created by the same user and in the same session where the
     // update statistics command is being issued from.
     // For SeaQuest (COM_VERSION >= 2500), the PUBLIC_ACCESS_SCHEMA is guaranteed
     // to exist, so don't call the function that checks for it.
     if (!objDef->isVolatile() && objDef->publicSchemaExists())
     {
       sampleTable  = objDef->getCatName(HSTableDef::EXTERNAL_FORMAT);
       sampleTable += ".PUBLIC_ACCESS_SCHEMA.";
     }
     else
     {
       sampleTable  = objDef->getHistLoc(HSTableDef::EXTERNAL_FORMAT);
       sampleTable += ".";
     }

 #ifdef _DEBUG
     // If the cqd USTAT_SAMPLE_TABLE_NAME_CREATE has a value, use it as the
     // sample table's name instead of deriving one from the uid of the sampled
     // table and the current timestamp.
     NAString cqdSampleTableName;
     CmpCommon::getDefault(USTAT_SAMPLE_TABLE_NAME_CREATE, cqdSampleTableName, 0);
     if (!IsNAStringSpaceOrEmpty(cqdSampleTableName))
       {
         sampleTable += cqdSampleTableName;
         makeAccessible_ = TRUE;  // Avoid making offline and undroppable
       }
     else
 #endif  /* _DEBUG */
       {
         char objectIDStr[30]; // room for 64-bit integer (20 digits max)
         char zeroPaddedObjectIDStr[30];
         char timestampStr[20]; // room for _<10 digits>_<6 digits>
         NA_timeval tv;
         NA_gettimeofday(&tv, 0);

         // When constructing the sample table name, we use a fixed-length
         // representation to minimize non-determinism in test logs. (We
         // have found that variable-length representations are subject to
         // havoc with line-wrapping semantics; it's harder to filter out
         // non-determinism that wraps across lines.)

         // convert object UID to a fixed-length string (with leading zeroes)
         // note: object UIDs are always positive today
         convertInt64ToAscii(objDef->getObjectUID(), objectIDStr);
         strcpy(zeroPaddedObjectIDStr,"000000000000000000000000"); // 20 zeroes
         strcpy(zeroPaddedObjectIDStr+(20-strlen(objectIDStr)),objectIDStr);

         // use fixed length strings for the time stamp parts too
         sprintf(timestampStr, "_%010u_%06u", (UInt32)tv.tv_sec, (UInt32)tv.tv_usec);
         sampleTable += TRAF_SAMPLE_PREFIX;  // "TRAF_SAMPLE_"
         sampleTable += zeroPaddedObjectIDStr;
         sampleTable += timestampStr;

         // This is FALSE by default; we only set it here defensively in case it
         // is somehow set to TRUE elsewhere.
         makeAccessible_ = FALSE;
       }
   }
   else
   {
     //Allow user to specify volume location for temporary table
     //through HIST_SCRATCH_VOL.
     NAString userLocation = getTempTablePartitionInfo(unpartitionedSample, isPersSample);
     if (userLocation.isNull())
       sampleTable = objDef->getCatName();
     else
     {
       sampleTable  = objDef->getNodeName();
       sampleTable += ".";
       sampleTable += userLocation.data();
     }
     sampleTable += ".ZZMXTEMP.";
     sampleTable += objDef->getObjectName(HSTableDef::EXTERNAL_FORMAT);
   }
 }

 /***********************************************/
 /* METHOD:  HSSample make() member function    */
 /* PURPOSE: Create a sample table for          */
 /*          sampling purposes only. Histograms */
 /*          will be determined with data from  */
 /*          this table only.                   */
 /* OUTPUT:  sampleTableName - name of table.   */
 /*          tableRowCnt - only assigned if     */
 /*              rowCountIsEstimate = TRUE.     */
 /* INPUT:   sampleRowCnt - the size of the     */
 /*              sample table to create.        */
 /* RETCODE:  0 - successful                    */
 /*           non-zero otherwise                */
 /***********************************************/
 Lng32 HSSample::make(NABoolean rowCountIsEstimate, // input
                     NAString &sampleTableName,    // output
                     Int64 &tableRowCnt,           // output - rowcount of original table
                     Int64 &sampleRowCnt,          // input/output
                     NABoolean isPersSample,       // input. Default value is FALSE
                     NABoolean unpartitionedSample,// input. Default value is TRUE
                     Int64 minRowCtPerPartition
                    )
   {
     Lng32 retcode = 0;
     NAString dml, insertType, sampleOption;
     char intStr[30];
     NABoolean forceNoPartitioning = TRUE;

     HSTranMan *TM = HSTranMan::Instance();
     HSLogMan  *LM = HSLogMan::Instance();
     HSGlobalsClass *hs_globals = GetHSContext();

     LM->StartTimer("Create/populate sample table");
     (void)getTimeDiff(TRUE);

     NABoolean EspCQDUsed = FALSE;
     NABoolean HBaseCQDsUsed = FALSE;

     sampleRowCount = sampleRowCnt;  // Save sample row count for HSSample object.

     // Create sample option based on sampling type, using 'samplePercent'.
     if (hs_globals)
       createSampleOption(sampleType, samplePercent, sampleOption,
                          hs_globals->sampleValue1, hs_globals->sampleValue2);
     else
       createSampleOption(sampleType, samplePercent, sampleOption);

     //Normally, we want to create an AUDITED scratch table. Although, for
     //performance and TMF timeouts, we should use a NON-AUDITED table and
     //SIDETREE inserts.
     //The default is to use SIDETREE inserts, unless a transaction is active.
     //You could use cqd USTAT_USE_SIDETREE_INSERT to control this feature.
     //NOTE: before making changes to this code, you must consider TMF
     //AUTOABORT time. This process may take a long time and blow away
     //your transaction.
     LM->StartTimer("Create sample table");
     retcode = create(unpartitionedSample, isPersSample);
     LM->StopTimer();
     if (retcode == -HS_PKEY_FLOAT_ERROR) {
       // If creation of sample table fails with -1120, then the primary key
       // has a float datatype and can't be partitioned.  Turn off POS.
       LM->StartTimer("Create sample table (no partitions)");
       retcode = create(forceNoPartitioning, isPersSample);
       LM->StopTimer();
     }
     if (retcode) TM->Rollback();
     HSHandleError(retcode);

     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\tSAMPLE TABLE = %s", sampleTable.data());
         LM->Log(LM->msg);
       }

     // If a transaction is running then the table was created as audited and we
     // need to use a vanilla INSERT statement. Otherwise, we can use SIDETREE
     // INSERTS for better performance. A current bug in the HBase interface
     // requires the use of Upsert.
     // For Hive tables the sample table used is a Trafodion table
     if (hs_globals->isHbaseTable || hs_globals->isHiveTable)
       {
         EspCQDUsed = setEspParallelism(hs_globals->objDef);

         // Set CQDs controlling HBase cache size (number of rows returned by HBase
         // in a batch) to avoid scanner timeout. Reset these after the sample query
         // has executed.
         if (hs_globals->isHbaseTable)
           HBaseCQDsUsed = hs_globals->setHBaseCacheSize(samplePercent);

         if (CmpCommon::getDefault(TRAF_LOAD_USE_FOR_STATS) == DF_ON)
           {
             insertType = "LOAD WITH NO OUTPUT, NO RECOVERY, NO POPULATE INDEXES, NO DUPLICATE CHECK INTO ";
           }
         else
           {
             insertType = "UPSERT USING LOAD INTO ";
           }
       }
     else if (TM->InTransaction())
       insertType = "INSERT INTO ";
     else
       {
         insertType = "INSERT INTO ";
         //10-040706-7608: a workaround for this solution is to turn cqd    //Workaround: 10-040706-7608
         //PLAN_STEALING OFF. Once the compiler fixes their plan stealing   //Workaround: 10-040706-7608
         //plan issues, the cqd should be removed.                          //Workaround: 10-040706-7608
         HSFuncExecQuery("CONTROL QUERY DEFAULT PLAN_STEALING 'OFF'");      //Workaround: 10-040706-7608
       }

     // If the insert is trying to insert values into an IDENTITY
     // column which is a GENERATED ALWAYS AS IDENTITY column,
     // then a special override is required to allow the insertion
     // of values into the IDENTITY column.

     HSFuncExecQuery("CONTROL QUERY DEFAULT OVERRIDE_GENERATED_IDENTITY_VALUES 'ON'");

     dml  = insertType;
     dml += sampleTable;
     dml += " SELECT ";

     // Generate the select list. Truncate any over-long char/varchar columns
     // by using SUBSTRING calls. Omit any LOB columns.
     objDef->addTruncatedSelectList(dml);

     dml += " FROM ";

     NAString hiveSrc = CmpCommon::getDefaultString(USE_HIVE_SOURCE);
     if (! hiveSrc.isNull())
       {
         dml += "HIVE.HIVE.";
         dml += objDef->getObjectName();
         dml += hiveSrc;
       }
     else
       dml += getTableName(objDef->getObjectFullName(), objDef->getNameSpace());

     char cardHint[50];
     sprintf(cardHint, " <<+ cardinality %e >> ", (double)hs_globals->actualRowCount);
     dml += cardHint;

     dml += sampleOption;
     dml += " FOR READ UNCOMMITTED ACCESS";
     const Int32 hsALLOW_SPECIALTABLETYPE = 0x1;
     if (objDef->getNameSpace() == COM_IUD_LOG_TABLE_NAME)
     {
        SQL_EXEC_SetParserFlagsForExSqlComp_Internal(hsALLOW_SPECIALTABLETYPE);
     }

     // initialize sourceTableRowCount to -1. The method that sets this parameter
     // will not change this value if there is an error. So if
     // sourceTableRowCount = -1 after the call, we know something went wrong
     // and we do not use this value.
     Int64 sourceTableRowCount = -1;
     // on very busy system, some "update statistics" implementation steps like
     // "Process_Query" step in HSSample::make() that calls HSFuncExecQuery
     // may experience failures resulting in a flurry of callcatcher error 9200
     // events that show up in genesis solutions like 10-110320-6751, etc.
     // we suspect some of these errors may be transient
     // failures that may succeed if retried enough times.
     // 2 cqds allow user control of these retries.
     Int32 centiSecs = getDefaultAsLong(USTAT_RETRY_DELAY);
     Int32 limit = getDefaultAsLong(USTAT_RETRY_LIMIT);
     if (limit < 1 || centiSecs < 1) // user does not want any retry
     {
       LM->StartTimer("Populate sample table");
       retcode = HSFuncExecQuery(dml, - UERR_INTERNAL_ERROR, &sampleRowCount,
                                 HS_QUERY_ERROR, &sourceTableRowCount, objDef);
       LM->StopTimer();
     }
     else // user wants retry
     {
       // use AQR
       HSFuncExecQuery("CONTROL QUERY DEFAULT AUTO_QUERY_RETRY 'ON'");
       LM->StartTimer("Populate sample table (with possible retry)");
       retcode = HSFuncExecQuery(dml, - UERR_INTERNAL_ERROR, &sampleRowCount,
                                 HS_QUERY_ERROR, &sourceTableRowCount, objDef);
       LM->StopTimer();
       HSFuncExecQuery("CONTROL QUERY DEFAULT AUTO_QUERY_RETRY RESET");
     }

     sampleRowCnt = sampleRowCount;

     if (objDef->getNameSpace() == COM_IUD_LOG_TABLE_NAME)
     {
       SQL_EXEC_ResetParserFlagsForExSqlComp_Internal(hsALLOW_SPECIALTABLETYPE);
     }

     // On SQ, alter the sample table to audit afterwards. There are performance
     // issues with non-audited tables on SQ. For Trafodion, however, this alter
     // is not supported, so skip it.
      if (!hs_globals->isHbaseTable && !hs_globals->isHiveTable)
        {
          LM->StartTimer("Set audit attribute on sample table");
          SQL_EXEC_SetParserFlagsForExSqlComp_Internal(hsALLOW_SPECIALTABLETYPE);

          NAString alterStmt;
          alterStmt = "ALTER TABLE ";
          alterStmt += sampleTable;
          alterStmt += " attribute audit" ;
          retcode = HSFuncExecDDL(alterStmt,
                                  -UERR_GENERIC_ERROR,
                                  NULL,
                                  sampleTable, objDef);
          SQL_EXEC_ResetParserFlagsForExSqlComp_Internal(hsALLOW_SPECIALTABLETYPE);

          // Don't invoke HSHandleError, which returns if there was an error; need to
          // reset cqds below.
          if (retcode)
            {
              HSFilterWarning(retcode);
              HSFilterError(retcode);
            }
          LM->StopTimer();
        }

     // RESET CQDS:
     //10-040706-7608: a workaround for this solution is to turn cqd        //Workaround: 10-040706-7608
     //PLAN_STEALING OFF. Once the compiler fixes their plan stealing       //Workaround: 10-040706-7608
     //plan issues, the cqd should be removed.                              //Workaround: 10-040706-7608
     if (!(TM->InTransaction()))                                            //Workaround: 10-040706-7608
       HSFuncExecQuery("CONTROL QUERY DEFAULT PLAN_STEALING RESET");        //Workaround: 10-040706-7608

     HSFuncExecQuery("CONTROL QUERY DEFAULT POS RESET");
     HSFuncExecQuery("CONTROL QUERY DEFAULT POS_NUM_OF_PARTNS RESET");

     // Reset the IDENTITY column override CQD
     HSFuncExecQuery("CONTROL QUERY DEFAULT OVERRIDE_GENERATED_IDENTITY_VALUES RESET");

     if (HBaseCQDsUsed)
       {
         hs_globals->resetCQDs();
       }
     if (EspCQDUsed)
       {
         resetEspParallelism();
       }

     if (retcode) TM->Rollback();
     else         TM->Commit();

     if (LM->LogNeeded())
       {
         convertInt64ToAscii(sampleRowCount, intStr);
         sprintf(LM->msg, "\t\t\tSAMPLE TABLE SIZE = %s", intStr);
         LM->Log(LM->msg);
       }

     // TEMP: ignore empty sample set if bulk load is on as rowcount is currently not
     // being returned by bulk load.
     if ((sampleRowCount == 0) &&                    // sample set is empty;
         (CmpCommon::getDefault(USTAT_USE_BULK_LOAD) == DF_OFF))
       {                                          // cannot generate histograms
         drop();  // drop the sample table we created
         HSFuncMergeDiags(- UERR_SAMPLE_SET_IS_ZERO);
         retcode = -1;
       }
     else if (hs_globals)
       {
       hs_globals->sampleTableUsed = TRUE;
       hs_globals->samplingUsed    = TRUE;
       hs_globals->sampleSeconds   = getTimeDiff();
       // If (a) current row count is estimate (for R2.3 and later, this is unlikely)
       //    (b) user has not specified the rowcount and
       //    (c) we appear to get a meaningful rowcount for the source table
       //        (source table rowcount >= rows inserted into sample table) and
       //    (d) CLUSTER sampling not used
       // we set the actualRowCount to the value obtained from the statistics table
       // This works since every row of the source table is scanned for EID sampling
       // and the number of rows scanned is recorded in the stats area.
       if (rowCountIsEstimate &&
           !(hs_globals->optFlags & ROWCOUNT_OPT) &&
           (sourceTableRowCount > sampleRowCount) &&
           (hs_globals->optFlags & SAMPLE_REQUESTED) != SAMPLE_RAND_2)
         {
           tableRowCnt = sourceTableRowCount;
           if (LM->LogNeeded())
           {
             convertInt64ToAscii(tableRowCnt, intStr);
             sprintf(LM->msg, "\tThe actual row count from statistics = %s", intStr);
             LM->Log(LM->msg);
           }
         }
       }
     else if (rowCountIsEstimate && sourceTableRowCount > sampleRowCount) tableRowCnt = sourceTableRowCount;

     LM->StopTimer();

     sampleTableName = sampleTable;
     if (hs_globals)
       hs_globals->checkTime("after creating and populating sample table");
     return retcode;
   }


 /************************************************************************/
 /*                         MakeAllHistid()                              */
 /*                                                                      */
 /* FUNCTION:  Generates new unique histogram ids                        */
 /*            for histograms to be updated.  Performs:                  */
 /*              CURSOR103_... (via groupListFromTable() - reads from    */
 /*                            HISTOGRAM for SERIALIZABLE ACCESS.        */
 /*                                                                      */
 /*            For existing histograms,                                  */
 /*              the new ids will be generated by DualHistid(old id).    */
 /*                                                                      */
 /*            For a new histogram,                                      */
 /*              if there is any old histogram,                          */
 /*                its id will be the maximum id (prevHistId) + 5.       */
 /*              if there is no old histogram,                           */
 /*                its id is Julian Timestamp masked with 0x7FFFFFFF.    */
 /*                                                                      */
 /************************************************************************/
 Lng32 HSGlobalsClass::MakeAllHistid()
   {
     Lng32 retcode = 0;
     ULng32 prevHistId = 0;
     NABoolean processMultiGroups;
     NAString missingHistograms = "";

     HSColGroupStruct *tableGroupList;
     HSColGroupStruct *tableGroup;
     HSColGroupStruct *group;
     NABoolean msgTruncated = FALSE;
     HSLogMan *LM = HSLogMan::Instance();

     // Create a list of histograms that already exist for this table
     // in HISTOGRAMS table.  This function also puts any duplicates in
     // a list at HSGlobalsClass::dupGroup.
     retcode = groupListFromTable(tableGroupList, FALSE, TRUE /* exclusive mode */); // Reads from HISTOGRAMS table
     HSHandleError(retcode);
     if (LM->LogNeeded())
       {
         LM->Log("\t\tEXISTING HISTOGRAMS");
         if (tableGroupList != NULL)
           tableGroupList->print();
       }

     //PASS 1: Loop through histograms that already exist in HISTOGRAMS table.
     //        For those that are to be updated as part of this update stats,
     //        create new hist id by inverting LSB of old hist id.
     //        Any old histogram that is not being updated, add to list
     //        so it will be reported in warning to user.
     LM->StartTimer("Change hist ids of existing histograms");
     tableGroup = tableGroupList;
     while (tableGroup != NULL)
       {
         prevHistId = MAXOF(prevHistId, tableGroup->oldHistid);
         group = findGroup(tableGroup); // find histogram entry in command list.
         if (group == NULL)
           {
             if (missingHistograms.length() <= HS_MAX_MSGTOK_LEN)
               {
                 if  (tableGroup->reason != HS_REASON_EMPTY)
                   {
                     missingHistograms += "(";
                     missingHistograms += tableGroup->colNames->data();
                     missingHistograms += "),";
                   }
               }
             else if (NOT msgTruncated)
               {
                 missingHistograms += "(...),";
                 msgTruncated = TRUE;
               }
           }
         else
           {
             if (group->oldHistid == 0) {
               // Only assign the following fields if not already assigned.
               // These fields are assigned with NECESSARY, ... syntax.
               group->oldHistid = tableGroup->oldHistid;
               group->reason = tableGroup->reason;
             }
             // Flip LSB to generate new histid.  Must use oldHistid from
             // tableGroup since this is what was just read from the HISTOGRAMS
             // table with groupListFromTable() function.  Preserve READ_TIME.
             group->newHistid = DualHistid(tableGroup->oldHistid);
             strncpy(group->readTime, tableGroup->readTime, TIMESTAMP_CHAR_LEN);
             group->readTime[TIMESTAMP_CHAR_LEN] = '\0';
           }
         tableGroup = tableGroup->next;
       }
     LM->StopTimer();  // Change hist ids of existing histograms

     // Create 9202 warning if some existing histograms are not getting regenerated
     // but not if the NECESSARY keyword is being used.
     if (missingHistograms.length() > 0 && !(optFlags & NECESSARY_OPT))
       {
         missingHistograms.remove(missingHistograms.length() - 1);       // remove last comma
         HSFuncMergeDiags(UERR_DOWN_LEVEL_HISTOGRAMS, missingHistograms.data());

         if (LM->LogNeeded())
           {
             LM->Log("WARNING: missing GroupLists ");
             LM->Log(missingHistograms.data());
           }
       }

     //PASS 2: Loop through histograms that are being updated.  Histograms that
     //        did not previously exist in the HISTOGRAMS table should have
     //        oldHistid = 0 and newHistid = 0.
     if (!(optFlags & CLEAR_OPT))
       {
         LM->StartTimer("Check histograms that are being updated");
         if (singleGroup)
           {
             group = singleGroup;                  /* process single-columns  */
             processMultiGroups = TRUE;            /* then multi-columns      */
           }
         else
           {
             group = multiGroup;                   /* process multi-columns   */
             processMultiGroups = FALSE;           /* only once               */
           }

         while (group != NULL)
           {
             if ((group->oldHistid == 0) &&
                 (group->newHistid == 0))
               {
                 if (prevHistId == 0)
                   {
                     prevHistId = (ULng32)
                       (NA_JulianTimestamp() &
                        ColStats::USTAT_HISTOGRAM_ID_THRESHOLD);
                     group->newHistid = prevHistId;
                   }
                 else
                   {
                     prevHistId = prevHistId + 5;
                     group->newHistid = prevHistId;
                   }
               }

             group = group->next;
             if (group == NULL && processMultiGroups)
               {
                 processMultiGroups = FALSE;
                 group = multiGroup;
               }
           }
         LM->StopTimer();   // Check histograms that are being updated
       }

     if (LM->LogNeeded())
       {
         LM->Log("\t\tREQUESTED GROUPLIST(s)");
         if (singleGroup != NULL)
           singleGroup->print();
         if (multiGroup != NULL)
           multiGroup->print();
       }

     if (tableGroupList != NULL)
       {
         delete tableGroupList;
         tableGroupList = NULL;
       }

     return retcode;
   }

 /**************************************************************************/
 /* METHOD:  getAdjustedIntervalCount()                                    */
 /* PURPOSE: Make adjustments to the number of intervals that will be used */
 /*          based on column type, single- or multi-column, and number of  */
 /*          gap and high frequency intervals desired.                     */
 /* PARAMS:  group(in) -- Group to adjust the interval count for.          */
 /*          intCount(in) -- Current number of intervals.                  */
 /*          rowCount(in) -- Number of rows in the table.                  */
 /*          rowsetSize(in) -- Number of rows in the current rowset.       */
 /*          singleIntervalPerUec(out) -- TRUE if this function determines */
 /*                                       we need a separate interval for  */
 /*                                       each distinct value.             */
 /*          gapIntCount(out) -- Number of intervals to be used for gaps.  */
 /*          highFreqIntCount(out) -- Number of intervals for high         */
 /*                                   frequency values.                    */
 /* RETCODE: The number of intervals to use, after adjustment.             */
 /**************************************************************************/
 Lng32 HSGlobalsClass::getAdjustedIntervalCount(
                               HSColGroupStruct *group,
                               Lng32 intCount,
                               Int64 rowCount,
                               Lng32 rowsetSize,
                               NABoolean &singleIntervalPerUec,
                               Lng32 &gapIntCount,      // get #intvls for gaps
                               Lng32 &highFreqIntCount) // get #intvls for high freq
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 adjIntCount = intCount;

   //For some reason character data requires more intervals. This code has
   //been here before my time. My best guess is that when we encode
   //char->double, using CharType::encodeString(), the numeric result may be
   //inaccurate. Therefore, if we use more intervals, the error becomes less
   //visible. Regardless, if the user requests GENERATE x INTERVALS, we should
   //not make any adjustments.
   if ( NOT (optFlags & INTERVAL_OPT) &&
        DFS2REC::isAnyCharacter(group->colSet[0].datatype))
     adjIntCount = MAXOF(intCount, 62);

   //#intervals should not be greater than the number of rows in table
   if (rowCount < (Int64) INT_MAX)
     adjIntCount = MINOF(adjIntCount, (Lng32)(rowCount));

   //If all the rows in the table have been exhausted AND
   //   UEC <= LOW_UEC_THRESHOLD or UEC <= #intervals
   // adjust the #intervals so that only 1 uec per interval.
   if ( ((rowsetSize <= CmpCommon::getDefaultLong(HIST_LOW_UEC_THRESHOLD))
                                     &&
          NOT (optFlags & INTERVAL_OPT)
        )
                                     ||
        ((optFlags & INTERVAL_OPT) && (rowsetSize <= intCount))
      )
     {
       adjIntCount = MAXOF(rowsetSize, 1);
       singleIntervalPerUec = TRUE;
     }

   // Set multi-column histograms to 1 interval.  The optimizer can only use
   if (group->colCount > 1 && !group->skewedValuesCollected)
     adjIntCount = 1;

   // If gap processing is to be done (numeric single-column groups and not single
   // interval for each distinct value), set the number of intervals we want to
   // use for gaps. Add that same number of extra intervals so we can be more
   // inclusive in creating gap intervals before we have seen all the gaps and
   // know their actual distribution. We will merge the lesser ones with other
   // intervals if we select too many.
   if (CmpCommon::getDefault(USTAT_PROCESS_GAPS) == DF_OFF)  // disabled by CQD
     {
       gapIntCount = 0;  // forget about gaps
       if (LM->LogNeeded())
         LM->Log("Gap processing is disabled.");
     }
   else if (!singleIntervalPerUec && (adjIntCount > 1) && (group->colCount <= 1)
                                  && DFS2REC::isNumeric(group->colSet[0].datatype))
     {
       gapIntCount = (Lng32)(adjIntCount
                        * (CmpCommon::getDefaultNumeric(USTAT_GAP_PERCENT) / 100));
       adjIntCount += gapIntCount;
     }
   else
     {
       gapIntCount = 0;  // forget about gaps
       if (LM->LogNeeded() && group->colCount == 1)
         {
           sprintf(LM->msg, "No gap processing for column %s: ",
                            group->colSet[0].colname->data());
           if (singleIntervalPerUec)
             sprintf(LM->msg + strlen(LM->msg), "using single interval per uec");
           else if (!DFS2REC::isNumeric(group->colSet[0].datatype))
             sprintf(LM->msg + strlen(LM->msg), "not a numeric column");
           else
             sprintf(LM->msg + strlen(LM->msg), "** reason unknown **");  // shouldn't happen
           LM->Log(LM->msg);
         }
     }

   // Add some intervals for high frequency values. These won't be used in the
   // calculation of the step size. highFreqIntCount is returned and passed to
   // HSHistogram ctor.
   if (!singleIntervalPerUec && adjIntCount > 1)
     {
       float freqSizePercent = (float) CmpCommon::getDefaultNumeric(USTAT_FREQ_SIZE_PERCENT);
       // avoid a divide by zero.  Also, a percentage should not be negative
       if(freqSizePercent<=0)
         highFreqIntCount = 1000;
       else
         highFreqIntCount = MINOF(1000,(Lng32)(100.0 / freqSizePercent));
       adjIntCount = MINOF(adjIntCount + highFreqIntCount, HS_MAX_INTERVAL_COUNT);
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Allotting %d intervals for high frequency values",
                   highFreqIntCount);
           LM->Log(LM->msg);
         }
     }
   else
     highFreqIntCount = 0;  // don't need them

   return adjIntCount;
 }

 // Return the name of the integral type to map an interval type to, based
 // on the largest integer value possible when a value of the interval type
 // is cast to its rightmost (most precise) field. The returned name is used
 // in the CAST clause in the query to retrieve the interval value. For example,
 // INTERVAL HOUR(2) TO MINUTE could represent a duration up to 5999 minutes,
 // and can be cast to a smallint.
 //
 NAString* getIntTypeForInterval(HSColGroupStruct *group, Int64 maxIntValue)
 {
   NAString* typeName;
   group->ISprecision = 0;
   group->ISscale = 0;
   if (maxIntValue <= CHAR_MAX)
     {
       group->ISdatatype = REC_BIN8_SIGNED;
       group->ISlength = 1;
       typeName = &LiteralTinyInt;  // from NumericTypes.h
     }
   else if (maxIntValue <= SHRT_MAX)
     {
       group->ISdatatype = REC_BIN16_SIGNED;
       group->ISlength = 2;
       typeName = &LiteralSmallInt;  // from NumericTypes.h
     }
   else if (maxIntValue <= INT_MAX)
     {
       group->ISdatatype = REC_BIN32_SIGNED;
       group->ISlength = 4;
       typeName = &LiteralInteger;   // from NumericTypes.h
     }
   else
     {
       group->ISdatatype = REC_BIN64_SIGNED;
       group->ISlength = 8;
       typeName = &LiteralLargeInt;  // from NumericTypes.h
     }

   return typeName;
 }

 // Return the type name that an interval containing a seconds field with
 // fractional seconds precision will be cast to. Also set up the other
 // type fields for the result of the cast. For example, INTERVAL MINUTE(2)
 // TO SECOND(6) would be cast as NUMERIC(10,6) to hold the number of seconds
 // in a value of the interval.
 //
 NAString* getNumericTypeForInterval(HSColGroupStruct *group,
                                     Lng32 secondsPrecision,
                                     Lng32 fractionalPrecision,
                                     char* precCommaScale)
 {
   NAString *typeName = new (STMTHEAP) NAString("numeric(", STMTHEAP);
   typeName->append(precCommaScale).append(")");
   group->ISprecision = secondsPrecision + fractionalPrecision;
   group->ISscale = fractionalPrecision;
   if (group->ISprecision < 5)
     {
       group->ISdatatype = REC_BIN16_SIGNED;
       group->ISlength = 2;
     }
   else if (group->ISprecision < 10)
     {
       group->ISdatatype = REC_BIN32_SIGNED;
       group->ISlength = 4;
     }
   else
     {
       group->ISdatatype = REC_BIN64_SIGNED;
       group->ISlength = 8;
     }

   return typeName;
 }

 // Form the select list expression for each column handled through internal sort.
 // For columns of an SQL type having a corresponding C type, the expression is
 // simply the column name.  For other types, the expression may perform an
 // order-preserving transformation into an efficiently sorted C type.
 //
 // This function also sets the type information for the sorted column.  If the
 // column is not transformed before sorting, these type fields (ISdatatype,
 // ISlength, ISprecision, ISscale) are the same as the column's original type
 // information.
 //
 // The forHive parameter indicates whether this is called when using a Hive
 // sample table that was created by the bulk loader. If so, we generate column
 // names as "col" with an integer appended that is the ordinal position of the
 // column in the table. This avoids all problems we would otherwise have with
 // delimited ids due to the restricted set of characters currently allowed in
 // Hive names and lack of case-sensitivity.
 //
 static void mapInternalSortTypes(HSColGroupStruct *groupList, NABoolean forHive = FALSE)
 {
   HSColGroupStruct *group = groupList;
   NAString* typeName;
   HSLogMan *LM = HSLogMan::Instance();
   char sbuf[40];
   while (group)
     {
      HSColumnStruct &col = group->colSet[0];
      NAString columnName, dblQuote="\"";
      group->ISSelectExpn = "";

      // If retrieving from the Hive backing sample for a table, positional names
      // are used. This avoids any issues with Trafodion delimited ids that do
      // not map to valid Hive column names.
      if (forHive)
        {
          columnName = "col";
          sprintf(sbuf, "%d", col.colnum+1);
          columnName.append(sbuf);
        }
      // Surround column name with double quotes, if not already delimited.
      else if (group->colNames->data()[0] == '"')
        columnName=group->colNames->data();
      else
        columnName=dblQuote+group->colNames->data()+dblQuote;

      *(col.externalColumnName) = columnName;

      switch (col.datatype)
      {
       case REC_DECIMAL_LSE:
       case REC_DECIMAL_UNSIGNED:
       case REC_DECIMAL_LS:
         if (col.precision < 5)
           {
             if (col.datatype == REC_DECIMAL_UNSIGNED)
               group->ISdatatype = REC_BIN16_UNSIGNED;
             else
               group->ISdatatype = REC_BIN16_SIGNED;
             group->ISlength = 2;
             typeName = &LiteralSmallInt;  // from NumericTypes.h
           }
         else if (col.precision < 10)
           {
             if (col.datatype == REC_DECIMAL_UNSIGNED)
               group->ISdatatype = REC_BIN32_UNSIGNED;
             else
               group->ISdatatype = REC_BIN32_SIGNED;
             group->ISlength = 4;
             typeName = &LiteralInteger;   // from NumericTypes.h
           }
         else
           {
             // Max precision is 18. Largeint can't be unsigned.
             group->ISdatatype = REC_BIN64_SIGNED;
             group->ISlength = 8;
             typeName = &LiteralLargeInt;  // from NumericTypes.h
           }
         group->ISprecision = 0;
         group->ISscale = 0;
         formatFixedNumeric((Int64)pow(10, col.scale), 0, sbuf);
         group->ISSelectExpn.append("cast(")
             .append(columnName)
             .append("*")
             .append(sbuf)
             .append(" as ")
             .append(*typeName);
         if (col.datatype == REC_DECIMAL_UNSIGNED)
           group->ISSelectExpn.append(" unsigned");
         group->ISSelectExpn.append(")");
         break;

       case REC_DATETIME:
         switch (col.precision)
           {
             case REC_DTCODE_DATE:
               group->ISdatatype = REC_BIN32_SIGNED;
               group->ISlength = 4;
               group->ISprecision = 0;
               group->ISscale = 0;
               group->ISSelectExpn.append("datediff(day, date'0001-01-01', ")
                   .append(columnName)
                   .append(")");
               break;

             // time(0) is treated as an integer, while time(n) is treated as
             // numeric(5+n,n).
             case REC_DTCODE_TIME:
               if (col.scale > 0)  // Max scale (fractional seconds) is 6
                 {
                   group->ISprecision = 5 + col.scale; // 5 digits for #seconds/day
                   group->ISscale = col.scale;
                   if (col.scale <= 4)
                     {
                       group->ISdatatype = REC_BIN32_SIGNED;
                       group->ISlength = 4;
                     }
                   else
                     {
                       group->ISdatatype = REC_BIN64_SIGNED;
                       group->ISlength = 8;
                     }
                 }
               else
                 {
                   group->ISdatatype = REC_BIN32_SIGNED;
                   group->ISlength = 4;
                   group->ISprecision = 0;
                   group->ISscale = 0;
                 }
               // CLI will treat this expression as a largeint regardless of the
               // fractional seconds precision. Since we are assuming it will be
               // a 4-byte int if scale < 5, we have to add an extra cast so the
               // value is returned in the format we are expecting.
               if (col.scale > 0 && col.scale <= 4)
                 group->ISSelectExpn.append("cast(");
               group->ISSelectExpn.append("hour(").append(columnName).append(")*3600+minute(")
                   .append(columnName).append(")*60+second(")
                   .append(columnName).append(")");
               if (col.scale > 0 && col.scale <= 4)
                 {
                   sprintf(sbuf, " as numeric(%d,%d))", group->ISprecision, group->ISscale);
                   group->ISSelectExpn.append(sbuf);
                 }
               break;

             case REC_DTCODE_TIMESTAMP:
               group->ISdatatype = REC_BIN64_SIGNED;
               group->ISlength = 8;
               group->ISprecision = 0;
               group->ISscale = 0;
               group->ISSelectExpn.append("juliantimestamp(")
                   .append(columnName)
                   .append(")");
               break;

             default:
               LM->Log("INTERNAL ERROR (mapInternalSortTypes):");
               sprintf(LM->msg, "Undefined datetime type %d", col.precision);
               LM->Log(LM->msg);
               *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                                   << DgString0("mapInternalSortTypes()")
                                   << DgString1("N/A")
                                   << DgString2(LM->msg);
               throw CmpInternalException("failure in mapInternalSortTypes()",
                                          __FILE__, __LINE__);
           }
         break;

       // col.precision is the leading field (decimal) precision for an
       // interval type. Using this as an exponent of 10 gives an upper bound
       // for the interval value cast as an integer.
       case REC_INT_YEAR:
       case REC_INT_MONTH:
       case REC_INT_DAY:
       case REC_INT_HOUR:
       case REC_INT_MINUTE:
         typeName = getIntTypeForInterval(group, (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(")
             .append(columnName)  // interval column
             .append(" as ")
             .append(*typeName)   // smallint, int, etc., depending on interval precision
             .append(")");
         break;

       case REC_INT_SECOND:
         if (col.scale > 0)
           {
             // The casts to seconds and to numeric will both have (prec,scale)
             // components, but prec includes scale for numeric, while for seconds
             // it does not.
             sprintf(sbuf, "%d,%d", col.precision+col.scale, col.scale);
             typeName = getNumericTypeForInterval(group, col.precision, col.scale, sbuf);
             sprintf(sbuf, "%d,%d", col.precision, col.scale); // for seconds cast below
             }
         else
           typeName = getIntTypeForInterval(group, (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(")
             .append(columnName)
             .append(" as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_YEAR_MONTH:
         sprintf(sbuf, "%d", col.precision+2); // required precision for single-field interval
         typeName = getIntTypeForInterval(group, 12 * (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval month(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_DAY_HOUR:
         sprintf(sbuf, "%d", col.precision+2);
         typeName = getIntTypeForInterval(group, 24 * (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval hour(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_HOUR_MINUTE:
         sprintf(sbuf, "%d", col.precision+2);
         typeName = getIntTypeForInterval(group, 60 * (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval minute(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_DAY_MINUTE:
         sprintf(sbuf, "%d", col.precision+4); // required precision for single-field interval
         typeName = getIntTypeForInterval(group, 24 * 60 * (Int64)pow(10, col.precision));
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval minute(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_MINUTE_SECOND:
         if (col.scale > 0)
           {
             // The casts to seconds and to numeric will both have (prec,scale)
             // components, but prec includes scale for numeric, while for seconds
             // it does not.
             sprintf(sbuf, "%d,%d", col.precision+2+col.scale, col.scale);
             typeName = getNumericTypeForInterval(group, col.precision+2, col.scale, sbuf);
             sprintf(sbuf, "%d,%d", col.precision+2, col.scale); // for seconds cast below
           }
         else
           {
             sprintf(sbuf, "%d,0", col.precision+2); // for seconds cast below
             typeName = getIntTypeForInterval(group, 60 * (Int64)pow(10, col.precision));
           }
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval second(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_HOUR_SECOND:
         if (col.scale > 0)
           {
             // The casts to seconds and to numeric will both have (prec,scale)
             // components, but prec includes scale for numeric, while for seconds
             // it does not.
             sprintf(sbuf, "%d,%d", col.precision+4+col.scale, col.scale);
             typeName = getNumericTypeForInterval(group, col.precision+4, col.scale, sbuf);
             sprintf(sbuf, "%d,%d", col.precision+4, col.scale); // for seconds cast below
           }
         else
           {
             sprintf(sbuf, "%d,0", col.precision+4); // for seconds cast below
             typeName = getIntTypeForInterval(group, 60 * 60 * (Int64)pow(10, col.precision));
           }
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval second(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       case REC_INT_DAY_SECOND:
         if (col.scale > 0)
           {
             // The casts to seconds and to numeric will both have (prec,scale)
             // components, but prec includes scale for numeric, while for seconds
             // it does not.
             sprintf(sbuf, "%d,%d", col.precision+5+col.scale, col.scale);
             typeName = getNumericTypeForInterval(group, col.precision+5, col.scale, sbuf);
             sprintf(sbuf, "%d,%d", col.precision+5, col.scale); // for seconds cast below
           }
         else
           {
             sprintf(sbuf, "%d,0", col.precision+5); // for seconds cast below
             typeName = getIntTypeForInterval(group, 24 * 60 * 60 * (Int64)pow(10, col.precision));
           }
         group->ISSelectExpn.append("cast(cast(")
             .append(columnName)
             .append(" as interval second(")
             .append(sbuf)
             .append(")) as ")
             .append(*typeName)
             .append(")");
         break;

       // Either this type not handled by IS yet, in which case the assigned
       // values won't matter, or no encoding is needed, in which case the
       // fields for the sorted type are the same as those for the original type.
       default:
         {
           HSGlobalsClass *hs_globals = GetHSContext();
           group->ISdatatype = col.datatype;
           //group->ISlength = col.length;
           group->setISlength(col.length,hs_globals->maxCharColumnLengthInBytes);
           group->ISprecision = col.precision;
           group->ISscale = col.scale;
           // the method below handles adding SUBSTRING for over-size char/varchars
           col.addTruncatedColumnReference(group->ISSelectExpn);
         }
         break;
      } // switch
      group = group->next;
     }  // while
 }

 // For each multi-column for which a histograms are to be created, determine
 // the extra memory needed for this multi-column to be computed in memory
 void HSGlobalsClass::getMCMemoryRequirements(HSColGroupStruct* mgroup, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded())
   {
      sprintf(LM->msg, "MC: Memory estimates for multi-group based on " PF64 " rows", rows);
      LM->Log(LM->msg);
   }

   while (mgroup)
   {
       getMemoryRequirementsForOneMCGroup(mgroup, rows);
       mgroup = mgroup->next;
   }
 }

 //
 // Get the number of bytes required for one multi-column group.
 //
 void HSGlobalsClass::getMemoryRequirementsForOneMCGroup(HSColGroupStruct* mgroup, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();

   Int64 memNeededForAllCols = 0;
   Int64 memAllowed = getMaxMemory();

   HSColGroupStruct *sgroup;
   HSColumnStruct* col;
   for (Int32 i=0; i< mgroup->colCount; i++)
   {
      col = &mgroup->colSet[i];
      sgroup = findGroup(col->colnum);
      memNeededForAllCols += sgroup->memNeeded;
      // to simplify coding take the size of the largest iterator
      mgroup->memNeeded += sizeof(MCFixedCharIterator);
      if (sgroup->colSet[0].nullflag)
      {
         mgroup->memNeeded += sizeof(NABitVector);
         mgroup->memNeeded += ceil(rows/8);
      }
   }

   mgroup->memNeeded += sizeof(MCWrapper)*rows;
   mgroup->mcis_totalMCmemNeeded = mgroup->memNeeded+memNeededForAllCols;

   if (LM->LogNeeded())
   {
     sprintf(LM->msg, "MC: Group with columns %s requires (%ld) bytes of memory"
                      " for internal sort including (%ld) bytes for MC processing.",
                       mgroup->colNames->data(), mgroup->mcis_totalMCmemNeeded, mgroup->memNeeded);
     LM->Log(LM->msg);
   }
 }

 // For each column for which histograms are to be created, determine the
 // number of bytes required to store all the values that will be used (all
 // column values or the size of the sample to be collected).
 //
 void HSGlobalsClass::getMemoryRequirements(HSColGroupStruct* group, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "Memory estimates for single group based on " PF64 " rows", rows);
       LM->Log(LM->msg);
     }

   while (group)
     {
       getMemoryRequirementsForOneGroup(group, rows);
       group = group->next;
     }
 }

 //
 // Get the number of bytes required for one single group.
 //
 void HSGlobalsClass::getMemoryRequirementsForOneGroup(HSColGroupStruct* group, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Int32 elementSize=0;

       switch (group->ISdatatype)
         {
           case REC_BOOLEAN:
           case REC_BIN8_SIGNED:
           case REC_BIN8_UNSIGNED:
             elementSize = 1;
             break;

           case REC_BIN16_SIGNED:
           case REC_BIN16_UNSIGNED:
             elementSize = 2;
             break;

           case REC_BIN32_SIGNED:
           case REC_BIN32_UNSIGNED:
           case REC_IEEE_FLOAT32:
             elementSize = 4;
             break;

           case REC_BIN64_SIGNED:
           case REC_BIN64_UNSIGNED:
           case REC_IEEE_FLOAT64:
             elementSize = 8;
             break;

           case REC_BYTE_F_ASCII:
           case REC_BYTE_F_DOUBLE:
             // Length is in bytes, not chars. Add size for object that references
             // the string, which is stored in a separate array.
             elementSize = group->ISlength + sizeof(ISFixedChar);
             break;

           case REC_BYTE_V_ASCII:
           case REC_BYTE_V_DOUBLE:
             elementSize = group->varcharContentSize() + sizeof(ISVarChar);
             break;

           default:
             // Check to see if the column's type is supposed to be handled
             // by internal sort.  If so, this is a problem.
             if (isInternalSortType(group->colSet[0]))
               {
                 LM->Log("INTERNAL ERROR (getInternalSortMemoryRequirements):");
                 sprintf(LM->msg, "Undefined datatype %d", group->ISdatatype);
                 LM->Log(LM->msg);
                 *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                                     << DgString0("getInternalSortMemoryRequirements()")
                                     << DgString1("N/A")
                                     << DgString2(LM->msg);
                 throw CmpInternalException("failure in getInternalSortMemoryRequirements()",
                                            __FILE__, __LINE__);
               }
             elementSize = 0;
             break;
         }

       Int64 i64MemNeeded = rows * elementSize;
       if (group->isCompacted())  // varchar only
         {
           i64MemNeeded += (MAX_ROWSET * group->inflatedVarcharContentSize());
         }
       group->memNeeded = (i64MemNeeded <= UINT_MAX ? (size_t)i64MemNeeded : 0);
       if (LM->LogNeeded())
         {
           if (group->memNeeded == 0)
             sprintf(LM->msg, "Column %s requires too much memory for internal sort",
                              group->colSet[0].colname->data());
           else
             sprintf(LM->msg, "Column %s requires " PFSZ " bytes of memory for internal sort.",
                              group->colSet[0].colname->data(), group->memNeeded);
           LM->Log(LM->msg);
         }
 }

 // The number of rows the allocation is based on is returned as the function
 // result. This is necessary because sampleRowCount may change (become larger)
 // subsequent to this -- if we are unable to read the entire sample directly
 // into memory, a sample table is created and populated, and sampleRowCount
 // is changed from an estimate to the actual number of rows read into the
 // sample table. The maximum number of rows we will actually read must be based
 // on the amount of memory allocated to hold their values.
 //
 Int64 HSGlobalsClass::getInternalSortMemoryRequirements(NABoolean performISForMC)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Int64 rows;
   if (sampleRowCount > 0)
     rows = sampleRowCount;
   else
     rows = actualRowCount;

   // get memory requirements for single column groups first
   getMemoryRequirements(singleGroup, rows);

   if ( performISForMC )
   {
      // now get memory requirements for multi-column groups
      getMCMemoryRequirements(multiGroup, rows);
   }

   return rows;
 }

 // Parse a simple query using the Where clause specified for an IUS statement
 // with the parser flag PARSING_IUS_WHERE_CLAUSE set. This will return an
 // error for any language constructs used within the Where clause that are not
 // appropriate in this context. Any ordinary syntax error is ignored; we let
 // these be reported later so they will be shown in the context of an actual
 // statement generated for IUS.
 Lng32 HSGlobalsClass::validateIUSWhereClause()
 {
   Lng32 retcode = 0;

   // use QualifiedName constructor to correctly handle delimited names.
   QualifiedName qualTableName(user_table->data(), 1);
   NAString tableNameStr = qualTableName.getUnqualifiedObjectNameAsAnsiString();
   NAString query = "select count(*) from ";
   query.append(tableNameStr);
   query.append(" where ").append(getWherePredicateForIUS());

   // set PARSING_IUS_WHERE_CLAUSE bit in Sql_ParserFlags; return it to
   // its entry value on exit
   PushAndSetSqlParserFlags savedParserFlags(PARSING_IUS_WHERE_CLAUSE);

   Parser parser(CmpCommon::context());
   Lng32 diagsMark = diagsArea.mark();

   // We have to make the table name used in the From clause unqualified, or it
   // will be flagged as a violation of the IUS Where clause restriction against
   // qualification. To make sure the correct table is used, we temporarily
   // replace the default catalog and schema with the ones for the table we
   // are using.
   SchemaName& sch = const_cast<SchemaName&>(ActiveSchemaDB()->getDefaultSchema());
   NAString oldCatName = sch.getCatalogNameAsAnsiString();
   NAString oldSchName = sch.getUnqualifiedSchemaNameAsAnsiString();
   const char* period = strchr(catSch->data(), '.');
   NAString tempCatName(catSch->data(), period - catSch->data());
   NAString tempSchName(period+1);
   sch.setCatalogName(tempCatName);
   sch.setSchemaName(tempSchName);
   ExprNode* tree = parser.getExprTree(query.data(),
                                       query.length(),
                                       CharInfo::ISO88591);
   sch.setCatalogName(oldCatName);
   sch.setSchemaName(oldSchName);

   if (!tree)
     {
       // If an ordinary syntax error occurs, don't report it here, let it
       //surface through parsing of an actual statement generated by IUS.
       if (diagsArea.contains(-UERR_SYNTAX_ERROR))
         diagsArea.rewind(diagsMark, TRUE);
       else
         retcode = diagsArea.mainSQLCODE();
     }

   return retcode;
 }

 /***********************************************/
 /* METHOD:  CollectStatistics()                */
 /* PURPOSE: Generate histograms based on data  */
 /*          read from table. If sampling is    */
 /*          requested, the data is read from   */
 /*          sample table. Otherwise, data is   */
 /*          read from the base table.          */
 /* NOTES:   Single-column histograms are       */
 /*          directly generated from data read. */
 /*          Multi-Columns are later computed.  */
 /* RETCODE:  0 - successful                    */
 /*          -1 - failure                       */
 /***********************************************/
 Lng32 HSGlobalsClass::CollectStatistics()
   {
     Lng32 retcode = 0;
     HSColGroupStruct *group = singleGroup;
     HSCursor *cursor;
     NAString textForColumnCast;
     NAString columnName, dblQuote="\"";
     const Lng32 maxCharBoundaryLen =
       (Lng32) CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_BOUNDARY_LEN);
     HSLogMan *LM = HSLogMan::Instance();
     NABoolean useSampling = sampleOptionUsed == TRUE ||
                             CmpCommon::getDefault(USTAT_FORCE_TEMP) == DF_ON;
     HSSample sampleTable(objDef, optFlags & SAMPLE_REQUESTED, sampleTblPercent);
       // Initialize variables for sample table.  May not be used.

     // set CQD for Hive if needed
     setHiveMaxStringLengthInBytes();

     /*======================================================================*/
     /* Perform internal sort if enabled.                                    */
     /*======================================================================*/
     // If internal sort is enabled (it is by default), each iteration of the
     // loop below reads in as many columns from the table as will fit in the
     // amount of memory we determine we can afford to use, and creates
     // histograms using internal sort on those columns.
     // selectSortBatch() chooses the columns to process in a query and marks them
     // PENDING.  These columns are marked PROCESSED in createStats() so they
     // will not be further considered. If any columns are unable to be processed
     // by internal sort due to lack of memory (resulting from fluctuating memory
     // availability or a miscalculation of memory available), they will be
     // processed in the "old" way.
     Int32 numColsSelected = 0;
     Int32 numColsToProcess = 0;
     NABoolean internalSortWhenBetter = FALSE;

     // Check to see if sample table specified via CQD (for debugging and testing).
     // CQD must specify the fully qualified name and must use 'SAMPLE xxx ROWS'
     // where xxx is the size of the specified sample table.
     NAString sampleTableFromCQD;
     CmpCommon::getDefault(USTAT_SAMPLE_TABLE_NAME, sampleTableFromCQD, FALSE);
     NABoolean useBackingSample = (CmpCommon::getDefault(USTAT_USE_BACKING_SAMPLE) == DF_ON);
     if (useBackingSample)
       {
         externalSampleTable = TRUE;
         sampleTableUsed     = TRUE;
         samplingUsed        = TRUE;
         *hssample_table = "HIVE.HIVE.";
         NAString hiveSampleTableName = user_table->data();
         TrafToHiveSampleTableName(hiveSampleTableName);
         hssample_table->append(hiveSampleTableName);
         snprintf(LM->msg, sizeof(LM->msg), "Using external sample table %s.",
                  hssample_table->data());
         LM->Log(LM->msg);
       }
     else if (! IsNAStringSpaceOrEmpty(sampleTableFromCQD))
       {
         *hssample_table = sampleTableFromCQD;
         externalSampleTable = TRUE;
         sampleTableUsed     = TRUE;
         samplingUsed        = TRUE;
       }
     else if (optFlags & IUS_PERSIST)  // PERSIST keyword given with sample clause
       {
         // Create a persistent sample table. It will be used for this non-IUS
         // execution of Update Stats, and updated incrementally for subsequent
         // IUS operations.
         HSPersSamples *sampleList = HSPersSamples::Instance(objDef->getCatName(),
                                                             objDef->getSchemaName());
         if (!sampleList)
           return -1;  // sample list didn't exist and failed creation
         retcode = sampleList->createAndInsert(objDef,
                                     *hssample_table,
                                     sampleRowCount, actualRowCount,
                                     TRUE, /* isEstimate */
                                     'I',  /* incremental update stats */
                                     TRUE, /* create addtional D and I
                                             tables for IUS, used by
                                             algorithm 2.  */
                                     minRowCtPerPartition_
                                               );
         if (retcode == 0)
           {
             externalSampleTable = TRUE;
             sampleTableUsed     = TRUE;
             samplingUsed        = TRUE;
           }
         else
           return retcode;
       }
     else
       externalSampleTable = FALSE;

     if (useBackingSample)
       {
         return CollectStatisticsWithFastStats();
       }
     else if (canDoIUS())
       {
         // Use IUS and use output param 'done' to indicate if we need to carry
         // on with RUS code below, or if IUS did it all and we can return.
         NABoolean done = FALSE;
         retcode = doIUS(done);
         HSHandleError(retcode);
         if (done)
           return retcode;

         // Set sampling parameters to do an RUS corresponding to the existing
         // persistent sample.
         useSampling = TRUE;
         externalSampleTable = TRUE;
         sampleTblPercent = sampleRateAsPercetageForIUS * 100;  // used for scaling results
       }

     NAString internalSortCQDValue = ActiveSchemaDB()->getDefaults().getValue(USTAT_INTERNAL_SORT);

     if (internalSortCQDValue == "OFF" )
       {
         // internal sort disabled
         if (LM->LogNeeded()) LM->Log("Internal sort is disabled");
         if (useSampling && !externalSampleTable)
           retcode = sampleTable.make(currentRowCountIsEstimate_,
                                      *hssample_table,
                                      actualRowCount, sampleRowCount);
             // hssample_table assigned, actualRowCount and sampleRowCount may get adjusted.
         else if (!externalSampleTable)
           {
             *hssample_table = getTableName(user_table->data(), nameSpace);
             sampleRowCount = actualRowCount;
           }
         HSHandleError(retcode);
       }
     else  // internal sort is enabled
       {
         // Figure out which groups are eligible for varchar compaction:
         // A varchar column is eligible if CQD USTAT_COMPACT_VARCHARS is 'ON'
         // and that column is not referenced by any multi-column group.
         // The reason for the latter condition is we want to avoid the
         // possibility of doing a second full table sample scan in the event
         // that we attempt to do multi-column histograms in-memory, and
         // we underestimate the memory needed for internal sort.

         NABoolean varcharCompactionFeasible = FALSE;
         if (CmpCommon::getDefault(USTAT_COMPACT_VARCHARS) == DF_ON)
           {
             NABitVector * refdColumns = new (STMTHEAP) NABitVector (STMTHEAP);
             for (HSColGroupStruct * mcgrp = multiGroup; mcgrp; mcgrp = mcgrp->next)
               {
                 for (Int32 i = 0; i < mcgrp->colCount; i++)
                   {
                     HSColumnStruct *c = &mcgrp->colSet[i];
                     refdColumns->setBit(c->colnum);
                   }
               }

             for (HSColGroupStruct * sgrp = singleGroup; sgrp; sgrp = sgrp->next)
               {
                 if (!refdColumns->contains(sgrp->colSet[0].colnum))
                   {
                     sgrp->eligibleForVarCharCompaction = TRUE;
                     varcharCompactionFeasible = TRUE;
                   }
               }

             delete refdColumns;
           }

         // Get percentage of available memory to recommend. If an allocation
         // for memory for a column fails, this percentage will be reduced
         // for subsequent selection of column batches.
         //
         ISMemPercentage_ = (float)CmpCommon::getDefaultNumeric(USTAT_IS_MEMORY_FRACTION);

         NABoolean trySampleTableBypassForIS = useSampling && externalSampleTable == FALSE;

         // If we are considering varchar compaction and IS, get previous histogram
         // information here.
         if (varcharCompactionFeasible)
           getPreviousUECRatios(singleGroup);

         mapInternalSortTypes(singleGroup);
         Int64 maxRowsToRead = getInternalSortMemoryRequirements(TRUE);

         if (trySampleTableBypassForIS && multiGroup ) {

               if (CmpCommon::getDefault(USTAT_USE_INTERNAL_SORT_FOR_MC) == DF_ON &&
                   allGroupsFitInMemory(maxRowsToRead))
               {
                 // if both single and MC groups can fit in memory, turn on
                 // performing MC in memory flag.
                 //
                 // Do not have to set the flag for next call to this function
                 // (HSGlobalsClass::CollectStatistics()) since it is not static and
                 // therefore a new HSGlobalsClass object will be constructed.
                 // In HSGlobalsClass::HSGlobalsClass(), the flag is set to FALSE.
                 setPerformISForMC(TRUE);

               } else {
                 // othereise, don't bypass sampling for internal sort.
                 trySampleTableBypassForIS = FALSE;
               }
          }

         if (useSampling && sampleRowCount <= getMinRowCountForSample())
           internalSortWhenBetter = FALSE;                              // always use internal sort.
         else
           internalSortWhenBetter = (internalSortCQDValue == "HYBRID"); // use best method.

         if (LM->LogNeeded())
         {
            sprintf(LM->msg, "Internal sort is enabled with value (%s) and internalSortWhenBetter is (%s)",
                             internalSortCQDValue.data(), internalSortWhenBetter ? "TRUE" : "FALSE");
            LM->Log(LM->msg);
         }

         // Set CQDs for internal sort:
         // Do not limit precision, this can cause internal sort failure.
         retcode = HSFuncExecQuery("CONTROL QUERY DEFAULT LIMIT_MAX_NUMERIC_PRECISION 'OFF'");
         HSHandleError(retcode);

         // identify the columns that are used in MC
         HSColGroupStruct *sgroup = NULL;
         HSColGroupStruct *mgroup = multiGroup;
         // backup of the original order of columns in the single column group list
         HSColGroupStruct* s_group_back[singleGroupCount];
         HSColumnStruct   *col;

         if ( performISForMC() )
         {
            while (mgroup != NULL)
            {
               mgroup->mcis_colsUsedMap = new (STMTHEAP) NABitVector (STMTHEAP);
               mgroup->mcis_colsMissingMap = new (STMTHEAP) NABitVector (STMTHEAP);
               Int32 colCount = mgroup->colCount;
               Int32 pos = 0;
               for (Int32 i=0; i<colCount; i++)
               {
                  col = &mgroup->colSet[i];
                  sgroup = findGroupAndPos(col->colnum, pos);
                  sgroup->mcs_usingme++;
                  mgroup->mcis_colsUsedMap->setBit(pos);
               }
               mgroup = mgroup->next;
            }
            mgroup = multiGroup;

            sgroup = singleGroup;
            Int32 i = 0;
            while (sgroup != NULL)
            {
               s_group_back[i++] = sgroup;
               sgroup = sgroup->next;
            }
            sgroup = NULL;
         }

         // If we need UEC ratios info and we haven't already read it, do so now
         if (internalSortWhenBetter && !varcharCompactionFeasible)
           getPreviousUECRatios(singleGroup);  // used to decide when to use IS


         if ( performISForMC() )
         {
            // reorder the single group columns if any MC group can be computed in memory
            orderMCGroups(s_group_back);
            mgroup = multiGroup;
         }

         numColsToProcess = getColsToProcess(maxRowsToRead,
                                               internalSortWhenBetter,
                                               trySampleTableBypassForIS);
         NABoolean hbaseCQDsUsed = FALSE;

         if (trySampleTableBypassForIS && numColsToProcess == singleGroupCount)
           {
               // This is not performed when there are MC stats to process.
               if (LM->LogNeeded())
                 LM->Log("Internal sort: reading sample directly from base table; no sample table created");
               *hssample_table = getTableName(user_table->data(), nameSpace);
                 // sampleTblPercent and sampleRowCount may get adjusted.
               createSampleOption(optFlags & SAMPLE_REQUESTED,
                                  sampleTblPercent, *sampleOption,
                                  sampleValue1, sampleValue2);
               sampleTableUsed = FALSE;
               samplingUsed    = TRUE;

               // Set CQDs controlling HBase cache size (number of rows returned by HBase
               // in a batch) to avoid scanner timeout. Reset these after the sample query
               // has executed.
               if (isHbaseTable)
                 hbaseCQDsUsed = HSGlobalsClass::setHBaseCacheSize(sampleTblPercent);
           }
         else
           {
               if (useSampling && !externalSampleTable)
               {
                 // free column memory, to allow sample table load to use it
                 deallocatePendingMemory();

                 // create and populate the sample table
                 retcode = sampleTable.make(currentRowCountIsEstimate_,
                                            *hssample_table,
                                            actualRowCount, sampleRowCount);
                 // hssample_table assigned, actualRowCount and sampleRowCount may get adjusted.

                 HSHandleError(retcode);

                 // reallocate column memory
                 numColsToProcess = getColsToProcess(maxRowsToRead,
                                               internalSortWhenBetter,
                                               trySampleTableBypassForIS);
               }
               else if (!externalSampleTable)
               {
                 *hssample_table = getTableName(user_table->data(), nameSpace);
                 sampleRowCount = actualRowCount;
               }
           }

         while (numColsToProcess > 0)
           {
             HSCursor cursor;
             (void)getTimeDiff(TRUE);

             LM->StartTimer("RUS: Read/sort data before creating STATISTICS");

             retcode = readColumnsIntoMem(&cursor, maxRowsToRead);
             HSHandleError(retcode);
             checkTime("after reading pending columns into memory for internal sort");
             columnSeconds = getTimeDiff() / numColsToProcess;  // saved for automation


             if (sampleRowCount == 0) // cannot generate histograms
             {
               HSFuncMergeDiags(-UERR_SAMPLE_SET_IS_ZERO);
               retcode = -1;
               HSHandleError(retcode);
             }

             retcode = sortByColInMem();
             HSHandleError(retcode);

             LM->StopTimer();

             LM->StartTimer("RUS: Create statistics for internal sort");
             retcode = createStats(maxRowsToRead);
             HSHandleError(retcode);
             LM->StopTimer();

             if ( performISForMC() &&
                 !allMCGroupsProcessed(TRUE)
                )
             {
                LM->StartTimer("MC: Compute MC stats using Internal Sort");
                retcode = ComputeMCStatistics(TRUE);
                LM->StopTimer();
                if ( performISForMC() && !allMCGroupsProcessed(TRUE)
                   )
                {
                   if (LM->LogNeeded())
                   {
                      sprintf(LM->msg, "\tMC: re-invoking the orderMCGroups logic");
                      LM->Log(LM->msg);
                   }
                   orderMCGroups(s_group_back);
                   mgroup = multiGroup;
                }
             }

             numColsToProcess = getColsToProcess(maxRowsToRead, internalSortWhenBetter);
           }

         HSFuncExecQuery("CONTROL QUERY DEFAULT LIMIT_MAX_NUMERIC_PRECISION RESET");
       }

                                           /*=================================*/
                                           /*         READ / GENERATE         */
                                           /*    SINGLE-COLUMN HISTOGRAMS     */
                                           /*=================================*/
     //All columns handled using internal sort above have been marked as "processed".
     //Remaining columns are handled with a separate query for each column, which
     //does the sorting and grouping.
     //The query generated is:
     //      SELECT column, COUNT(*) FROM table GROUP BY column ORDER BY column
     //The result will always be a VARCHAR(len) CHARACTER SET UCS2
     //In most cases, this will reduce the number of fetches.

     if (CmpCommon::getDefault(USTAT_ATTEMPT_ESP_PARALLELISM) == DF_OFF)
       HSFuncExecQuery("CONTROL QUERY DEFAULT ATTEMPT_ESP_PARALLELISM 'OFF'");

     group = singleGroup;
     if (singleGroup && LM->LogNeeded())
       LM->StartTimer("Query sort/group for individual columns");
     while (group != NULL)
       {
         if (group->state == PROCESSED)
           {
             group = group->next;
             continue;
           }

         HS_ASSERT(group->state != PENDING);

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "Using query sort/group for column %s",
                              group->colSet[0].colname->data());
             LM->Log(LM->msg);
           }

         // Surround column name with double quotes, if not already delimited.
         if (group->colNames->data()[0] == '"')
           columnName=group->colNames->data();
         else
           columnName=dblQuote+group->colNames->data()+dblQuote;

         //We must use TRANSLATE to convert non-unicode character strings
         //to unicode
         NABoolean isVarChar = group->computeAvgVarCharSize();
         // For long character strings, we'll truncate (trading off some
         // UEC accuracy for performance and also avoiding engine bugs
         // pertaining to very long varchars).
         HSColumnStruct &col = group->colSet[0];
         bool isOverSized = DFS2REC::isAnyCharacter(col.datatype) &&
               (col.length > maxCharColumnLengthInBytes);

         if (isVarChar)
          *group->clistr = "SELECT FMTVAL, SUMVAL, AVGVAL FROM (SELECT ";
         else
          *group->clistr = "SELECT FMTVAL, SUMVAL FROM (SELECT ";
         group->clistr->append(columnName.data());
         group->clistr->append(group->generateTextForColumnCast());
         if (isVarChar)
         {
           group->clistr->append(", COUNT(*), AVG(OCTET_LENGTH(");
           group->clistr->append(columnName.data());
           group->clistr->append(")) FROM ");
         }
         else
          group->clistr->append(", COUNT(*) FROM ");

         Int64 hintRowCount =  0;

         if (sampleTableUsed)
         {
           hintRowCount = sampleRowCount;
         }
         else
         {
           hintRowCount = actualRowCount;
         }

         char cardHint[50];
         sprintf(cardHint, " <<+ cardinality %e >> ", (double)hintRowCount);

         if (isOverSized)
         {
           // Stick in a nested select that truncates the string.
           // We have to do it here so the truncated string is
           // the grouping column below.
           char temp[20];  // long enough for 32-bit integer
           sprintf(temp,"%d",maxCharColumnLengthInBytes);

           group->clistr->append("(SELECT SUBSTR(");
           group->clistr->append(columnName.data());
           group->clistr->append(" FOR ");
           group->clistr->append(temp);
           group->clistr->append(") AS ");
           group->clistr->append(columnName.data());
           group->clistr->append(" FROM ");
           group->clistr->append(hssample_table->data());
           group->clistr->append(cardHint);
           group->clistr->append(") AS T1");
         }
         else
         {
           group->clistr->append(hssample_table->data());
           group->clistr->append(cardHint);
         }

         group->clistr->append(" GROUP BY ");
         group->clistr->append(columnName.data());
         group->clistr->append(" FOR READ UNCOMMITTED ACCESS) T(");
         group->clistr->append(columnName.data());
         if (isVarChar)
         {
           group->clistr->append(", FMTVAL, SUMVAL, AVGVAL) ORDER BY ");
         }
         else
          group->clistr->append(", FMTVAL, SUMVAL) ORDER BY ");
         group->clistr->append(columnName.data());

         cursor = new(STMTHEAP) HSCursor;

         char msg_buf[1000];
         sprintf(msg_buf, "RUS: create Single-column stats: fetchBoundaries() for %s",
                 group->colSet[0].colname->data());
         LM->StartTimer(msg_buf);
         (void)getTimeDiff(TRUE);
         retcode = cursor->fetchBoundaries(group,
                                           sampleRowCount,
                                           intCount,
                                           samplingUsed);
         group->colSecs = getTimeDiff();
         LM->StopTimer();
         delete cursor;
         HSHandleError(retcode);

         //10-030702-7560: There is no need to continue processing columns when
         //an empty table is detected.
         if (sampleRowCount == 0)
           {
             actualRowCount = 0;
             break;
           }
                                           /*=================================*/
                                           /*       FIX SAMPLING COUNTS       */
                                           /* sampled UEC -> est UEC          */
                                           /* sampled ROWCOUNT -> est ROWCOUNT*/
                                           /*=================================*/
         if (samplingUsed && sampleRowCount > 0 && actualRowCount > sampleRowCount)
           {
             LM->StartTimer("fix sample row counts");
             retcode = FixSamplingCounts(group);
             HSHandleError(retcode);
             LM->StopTimer();
             if (group->groupHist) group->groupHist->deleteFiArray();
           }
         group = group->next;
       }

     // If the current row count for an Hbase table is an estimate, then
     // actualRowCount is the estimate of the row count given by HBase. This
     // estimate can sometimes be inaccurate. Now that we have actually read
     // the data, we can improve the estimate. If we used sampling, we can
     // divide our sampleRowCount by the sampling ratio. If we did not use
     // sampling, the sampleRowCount is the true row count.

     // Note: After a recent code change, we no longer do an estimate
     // when not doing sampling. So the "else" case below is actually dead
     // code. But I'm leaving the code here on the chance that we change
     // our minds about estimates in the non-sampling case.

     if (isHbaseTable && currentRowCountIsEstimate_)
       {
         if (samplingUsed)
           {
             HS_ASSERT(sampleTblPercent > 0 && sampleTblPercent <= 100.00);
             Int64 newActualRowCount = (Int64)((100 * sampleRowCount) / sampleTblPercent);
             if (LM->LogNeeded())
               {
                 sprintf(LM->msg, "Re-estimating actualRowCount (was " PF64 ") as " PF64,
                                  actualRowCount,newActualRowCount);
                 LM->Log(LM->msg);
               }
             actualRowCount = newActualRowCount;
           }
         else
           {
             if (LM->LogNeeded())
               {
                 sprintf(LM->msg, "Correcting actualRowCount (was " PF64 ") from sampleRowCount (" PF64 ")",
                                  actualRowCount,sampleRowCount);
                 LM->Log(LM->msg);
               }
             actualRowCount = sampleRowCount;
           }
       }

     if (singleGroup && LM->LogNeeded())
       LM->StopTimer();

                                           /*=================================*/
                                           /*             COMPUTE             */
                                           /*     MULTI-COLUMN HISTOGRAMS     */
                                           /*=================================*/
     //10-040220-3429
     //Since Multi-Column statistics are dependent on Single-Column statistics,
     //we must make sure that Single-Column statistics exist. This can easily be
     //done by checking that the rowcount is greater than zero.
     if (multiGroup != NULL && actualRowCount > 0)
       {


         if (!allMCGroupsProcessed())
         {
            LM->StartTimer("MC: Compute MC stats using SQL");
            retcode = ComputeMCStatistics();
            LM->StopTimer();
         }
         else if (LM->LogNeeded())
         {
            LM->Log("MC: No MCs to compute using SQL, all processed using Internal Sort");
         }


         HSHandleError(retcode);

         LM->StartTimer("MC: fix MC stats");

         if (samplingUsed && sampleRowCount > 0 && actualRowCount > sampleRowCount)
           {
             group = multiGroup;
             while (group != NULL)
             {
               retcode = FixSamplingCounts(group);
               HSHandleError(retcode);
               if (group->groupHist) group->groupHist->deleteFiArray();
               group = group->next;
             }
           }
         LM->StopTimer();
       }

     if (CmpCommon::getDefault(USTAT_ATTEMPT_ESP_PARALLELISM) == DF_OFF)
       HSFuncExecQuery("CONTROL QUERY DEFAULT ATTEMPT_ESP_PARALLELISM RESET");

     if (samplingUsed || (optFlags & ROWCOUNT_OPT))
       {
         //In combination of SAMPLE and "SELECT <col>, COUNT(*)", we may read
         //more rows than actually specified. We need to make sure that the
         //rowcount is no less than the number of rows actually read.
         //For example: The table has 18 rows. The statement
         //"SAMPLE 10 ROWS SET ROWCOUNT 10", will read 100% of table. Since the
         //rowcount was specified, actualrowcount=10. Although, the actual
         //number of rows read is 18(samplerowcount). Rowcount must always be
         //greater than or equal to samplerowcount.
         actualRowCount = MAXOF(actualRowCount, sampleRowCount);
       }

     // Reset sampling variables.
     sampleTableUsed = FALSE;
     samplingUsed = FALSE;

     return retcode;
   }

 // Do the setup work for IUS, and call either doFullIUS() or prepareToUsePersistentSample(),
 // depending on whether the USTAT_INCREMENTAL_UPDATE_STATISTICS cqd is ON or
 // SAMPLE. In the latter case prepareToUsePersistentSample() is called to update
 // the persistent sample incrementally, and then the normal Update Stats algorithm
 // executes using the persistent sample table.
 //
 // The 'done' parameter is returned with a value of TRUE if stats are  completely
 // handled by doFullIUS(). If prepareToUsePersistentSample() is called instead,
 // or if doFullIUS() is unable to incrementally update the stats for one or more
 // columns (e.g., shape test failure), 'done' will be set to FALSE.
 Lng32 HSGlobalsClass::doIUS(NABoolean& done)
 {
   done = FALSE;  // set to TRUE if IUS successfully updates the stats for all columns
   Lng32 retcode = 0;

   // Make sure the Where clause doesn't contain any constructs we don't allow
   // in the context of an IUS statement.
   retcode = validateIUSWhereClause();
   HSHandleError(retcode);

   retcode = begin_IUS_work();
   HSHandleError(retcode);

   Int64 currentSampleSize = 0;
   Int64 futureSampleSize = 0;

   retcode = computeSampleSizeForIUS(currentSampleSize, futureSampleSize);
   HSHandleErrorIUS(retcode);

   DefaultToken iusOption = CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS);
   if (iusOption == DF_ON)
     return doFullIUS(currentSampleSize, futureSampleSize, done);
   else if (iusOption == DF_SAMPLE)
     // Leave 'done' FALSE; prepareToUsePersistentSample() updates the persistent sample
     // table in preparation for use by RUS.
     return prepareToUsePersistentSample(currentSampleSize, futureSampleSize);
   else
     {
       // Exception will be thrown, ~HSGlobalsClass will call end_IUS_work().
       HS_ASSERT(false);
       return -1;  // avoid 'no return' warning
     }
 }

 // Try to incrementally update existing histograms using in-memory tables and
 // CBFs. If reversion to RUS is required for one or more columns, the 'done'
 // output parameter will be set to FALSE.
 Lng32 HSGlobalsClass::doFullIUS(Int64 currentSampleSize,
                                 Int64 futureSampleSize,
                                 NABoolean& done)
 {
   done = FALSE;  // unless IUS handles all columns
   HSLogMan* LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   ISMemPercentage_ = (float)CmpCommon::getDefaultNumeric(USTAT_IS_MEMORY_FRACTION);

   // Setup the memory requirement for singlegroup in each memNeeded field,
   // Since singleGroup will receive the computed new stats, we compute
   // the memory needs by using futureSampleSize.
   mapInternalSortTypes(singleGroup);
   getMemoryRequirements(singleGroup, futureSampleSize);

   // =====================================================================
   // create the inMemory delete table, the cstr also setups the memory
   // requirement utilizing the currentSampleSize as #rows.
   // =====================================================================
   iusSampleDeletedInMem = new(STMTHEAP)
                              HSInMemoryTable(*hssample_table,
                                          getWherePredicateForIUS(),
                                          currentSampleSize
                                         );

   // =====================================================================
   // Similarly, create the inMemory insert table.
   // =====================================================================
   NAString sampleTable_I(*hssample_table);
   sampleTable_I.append("_I");

   iusSampleInsertedInMem = new(STMTHEAP)
                             HSInMemoryTable(sampleTable_I,
                                          getWherePredicateForIUS(),
                                          futureSampleSize,// worse case
                                          sampleRateAsPercetageForIUS);

   // =====================================================================
   // Create and populate the sample _I table
   // =====================================================================

   if (!sampleIExists_)
     {
       retcode = create_I(*hssample_table);
       HSHandleError(retcode);
       sampleIExists_ = TRUE;  // so we remember to drop it
     }

   if (!sample_I_generated)
     {
       retcode = generateSampleI(currentSampleSize, futureSampleSize);
       HSHandleError(retcode);
       sample_I_generated = TRUE;
     }

   // =====================================================================
   // Find out which group has persistent CBFs and set the delayedRead flag
   // for it. Do it for all groups.
   // =====================================================================
   detectPersistentCBFsForIUS(*hssample_table, singleGroup);


   Int32 colsSelected = 0;
   NABoolean ranOutOfMem = FALSE;

   while ( moreColsForIUS() > 0 ) {

      // Select a set of columns for IUS, based on the availability
      // of memory. Selected columns will be marked in PENDING state.
      // The number of columns selected is returned.
      //
      // If a particular column has persistent CBF, its column
      // structure's delayedRead is set to TRUE.
      //
      retcode = selectIUSBatch(currentSampleSize, futureSampleSize, ranOutOfMem, colsSelected);
      HSHandleErrorIUS(retcode);
      checkTime("after selecting batch of columns for IUS");

      //
      // Require at least one column in the persistent sample table
      // to be read into memory in one batch
      //
      if ( colsSelected == 0 ) {
        if ( ranOutOfMem ) {
          if (LM->LogNeeded())
            {
              // only do the warning diagnostic if logging is enabled
              diagsArea << DgSqlCode(UERR_WARNING_IUS_INSUFFICIENT_MEMORY)
                        << DgInt0(moreColsForIUS());
            }
        } else {
          if (LM->LogNeeded())
            LM->Log("Empty IUS batch, not because of memory.");
        }
        break;  // Let RUS handle the rest
      }


      // process column groups that are in PENDING state
      // Data from columns with delayedRead set to FALSE (i.e., no
      // corresponding persistent CBFs) will be read in.
      //
      // The rows to be deleted in one shot later on will be read in
      // from the sample table.
      //
      retcode = CollectStatisticsForIUS(currentSampleSize, futureSampleSize);
      HSHandleErrorIUS(retcode);

      //
      // Fall back to let internal sort based RUS to take care of any
      // groups failing to be updated via IUS. These groups are in PENDING
      // state. If all groups are processed, no more read and we are done!
      //
      HSColGroupStruct *group = singleGroup;
      Int32 cols = 0;
      Int32 colsToRead = 0;

      // First mask out those groups that already have data read in.
      // These groups should have delayedRead flag set to FALSE.
      //
      // We do need to properly merge the data from S(i-1), D and I
      // together, so that group->data points at the merged data and
      // group->nextdata points at the end+1 of the merged data.
      //
      // The merge algorithm:
      //
      // Allocate a temp. buffer of size (|S(i-1)| + |I|)
      // For all data item v in S(i-1) and I do
      //    if ( v in cbf ) {
      //       append data to the temp. buffer
      //       remove one instance of v from cbf
      //    }
      // delete group->data
      // set group->data = temp. buffer
      // set group->nextdata = temp. buffer's size + 1

      while (group) {
        if (group->state==PENDING) {

           if ( group->delayedRead == FALSE ) {
              group->state=SKIP;
           } else
              colsToRead++;

           cols++;
        }
        group = group->next;
      }


      if ( cols > 0 ) {

        if (LM->LogNeeded())
          LM->StartTimer("IUS: process failed groups with RUS");

        // First we need to read in column data if it has not been
        // read previously (i.e., those with delayedRead is TRUE, or
        // it has persistent CBF).
        if ( colsToRead > 0 )
        {
           HSCursor cursor;
           // Read from the persistent sample table and use smplGroup to
           // hold the data read. All columns in PENDING state
           // will be read in.
           retcode = readColumnsIntoMem(&cursor, currentSampleSize);

           HSHandleErrorIUS(retcode);
           checkTime("after reading pending columns from persistent sample table into memory for IUS->RUS reversion");

        }

        group = singleGroup;
        while (group) {
          if (group->state==SKIP) {
             group->state=PENDING;
          }
          group = group->next;
        }

        // Fill each group (in PENDING state)'s data area with data merged from
        // cbf, S(I-1) and I.
        retcode = mergeDatasetsForIUS();
        HSHandleErrorIUS(retcode);
        checkTime("after merging datasets for IUS->RUS reversion");

        group = singleGroup;
        while (group) {
          if (group->state==PENDING) {

             // Delete the histogram allocated for IUS. The RUS step
             // below will recompute groupHist.
             delete group->groupHist;
             group->groupHist = NULL;
          }
          group = group->next;
        }


        // Remove all persistent CBFs with PENDING state
        // because RUS may generates a histogram with different
        // #intervals than that recorded in CBF! If the CBFs
        // are left undeleted, the encoded intervals (as
        // # of buckets) in CBF can be in conflict with
        // the actual # of intervals computed by RUS.
        retcode = deletePersistentCBFsForIUS(*hssample_table, singleGroup, PENDING);
        HSHandleErrorIUS(retcode);

        retcode = sortByColInMem();
        HSHandleErrorIUS(retcode);

        retcode = createStats(0 /* dummy argument */);
        HSHandleErrorIUS(retcode);

        if (LM->LogNeeded())
          LM->StopTimer();
      }
   }  // while ( moreColsForIUS() > 0 )

   // The _I table can be dropped after using it to update the persistent sample
   // table, which must be done before doing RUS on any unprocessed columns (RUS
   // will use the updated persistent sample).
   retcode = UpdateIUSPersistentSampleTable(currentSampleSize, futureSampleSize, sampleRowCount);
   HSHandleErrorIUS(retcode);
   if (sampleIExists_) {
     retcode = drop_I(*hssample_table);
     HSHandleErrorIUS(retcode);
     sampleIExists_ = FALSE;  // only try to drop it once
   }

   Int32 iusUnprocessed  = 0;
   // Reverse the NO_STATS state to UNPROCESSED and count
   // total unprocessed
   HSColGroupStruct *group = singleGroup;
   while (group != NULL) {
     if (group->state == NO_STATS ) {
        group->state = UNPROCESSED;
        iusUnprocessed ++;
     } else
     if (group->state == UNPROCESSED )
        iusUnprocessed ++;

       group = group->next;
   }

   // Leave the 'done' parameter FALSE so we continue with the RUS code upon return
   // if there are unprocessed columns (not enough memory or no prior stats as a
   // base to compute IUS), or if there are MCs to process.
   if ( iusUnprocessed > 0 )  {
     // Remove all persistent CBFs with UNPROCESSED state because RUS may generate
     // a histogram with different #intervals than that recorded in CBF! If the CBFs
     // are left undeleted, the encoded intervals (as # of buckets) in CBF can be in
     // conflict with the actual # of intervals computed by RUS.
     retcode = deletePersistentCBFsForIUS(*hssample_table, singleGroup, UNPROCESSED);
     HSHandleErrorIUS(retcode);
   } else if (multiGroup) { // not done if there are MCs to process
   } else {
     done = TRUE;  // no need to use RUS code
   }

   return retcode;
 }

 // This function makes all preparations for doing RUS using an updated IUS
 // persistent sample table. The sample table is updated, and obsolete CBFs
 // are discarded,
 Lng32 HSGlobalsClass::prepareToUsePersistentSample(Int64 currentSampleSize,
                                                    Int64 futureSampleSize)
 {
   Lng32 retcode = 0;
   retcode = UpdateIUSPersistentSampleTable(currentSampleSize, futureSampleSize, sampleRowCount);
   HSHandleErrorIUS(retcode);

   // If there are existing CBFs, they will be obsolete once the current operation
   // completes.
   retcode = deletePersistentCBFsForIUS(*hssample_table, singleGroup, UNPROCESSED);
   HSHandleErrorIUS(retcode);

   return retcode;
 }

 //
 // A help function to generate a SQL timestamp constant. Example: '2012-01-01 23:59:00'
 //
 // bdt: input argument representing a break-down time value
 // timestamp: output argument representing the SQL timestamp constant
 //
 void genSQLTimestampConstant(struct tm * bdt, NAString& timestamp)
 {
   char buf[100];

   // year
   str_itoa(bdt->tm_year+1900, buf); timestamp = buf;

   timestamp += "-";

   // tm_mon is in  [0, 11]
   if ( bdt->tm_mon < 9 ) timestamp += "0"; // < rather than <= since we add one in the next line
   str_itoa(bdt->tm_mon+1, buf); timestamp += buf;

   timestamp += "-";

   // tm_mday is in [1, 31]
   if ( bdt->tm_mday <= 9 ) timestamp += "0";
   str_itoa(bdt->tm_mday, buf); timestamp += buf;

   timestamp += " ";

   // tm_hour is in [0, 23]
   if ( bdt->tm_hour <= 9 ) timestamp += "0";
   str_itoa(bdt->tm_hour, buf); timestamp += buf;

   timestamp += ":";

   // tm_min is in [0, 59]
   if ( bdt->tm_min <= 9 ) timestamp += "0";
   str_itoa(bdt->tm_min, buf); timestamp += buf;

   timestamp += ":";

   // tm_sec is in [0, 59]
   if ( bdt->tm_sec <= 9 ) timestamp += "0";
   str_itoa(bdt->tm_sec, buf); timestamp += buf;

 }

 void genArkcmpInfo(NAString& nidpid)
 {
   char buf[100];

   str_itoa(((NAClusterInfoLinux*)gpClusterInfo)->get_nid(), buf);
   nidpid = buf;

   nidpid += ":";

   str_itoa(((NAClusterInfoLinux*)gpClusterInfo)->get_pid(), buf);
   nidpid += buf;

   nidpid += " (nid:pid)";
 }

 //
 // This method starts a long-running (relatively) transaction to serialize IUS work
 // against a target table. The transaction is established by updating the row in the
 // PERSISTENT_SAMPLES table about the persistent sample table used by the IUS:
 //   1. UPDATE_DATE field is populated with the current timestamp;
 //   2. UPDATER_INFO field is populated with the SQ node ID and process ID
 //      of the tdm_arkcmp process performing the IUS work
 // The method will return 0 after the above successful updates, indicating that the IUS
 // work can proceed.
 //
 // The transaction can fail to establish when another long-running IUS transaction
 // is working against the same target table. The condition can be detected by
 // querying the UPDATE_DATA field about the sample table. A non-zero timestamp
 // value (call it P1) indicates an on-going IUS transaction. When P1
 // is sufficiently close to the current timestamp P2 (P2-P1 <=
 // CQD(USTAT_IUS_MAX_TRANSACTION_DURATION)), the ongoing transaction is
 // considered legitimate, and the current call to the method will return an error
 // indicating that a concurrent IUS is in progress.
 // ius_update_history_buffer will be filled with the string read from the
 // UPDATER_INFO column.
 //
 // When P2-P1 > CQD(USTAT_IUS_MAX_TRANSACTION_DURATION), the on-going transaction is
 // considered over-due and will be discarded. The method proceeds as if there was
 // no IUS transaction pending (see the 1st paragraph above) and will return a retcode
 // of 0, indicating success.
 //
 // The querying of these two fields against PERSISTENT_SAMPLE table must be
 // protected by a serializable transaction.
 //
 // The CQD USTAT_IUS_MAX_TRANSACTION_DURATION specifies the max transaction
 // duration allowed with the unit in minutes. The default value is 720 minutes (12 hours).

 Lng32 HSGlobalsClass::begin_IUS_work()
 {
    sampleIExists_ = FALSE;  // keep track of whether a _I table needs to be dropped

 #ifdef _DEBUG
    if (CmpCommon::getDefault(USTAT_IUS_NO_BLOCK) == DF_ON)
      return 0;
 #endif

    HSPersSamples *sampleList = HSPersSamples::Instance(objDef->getCatName(),
                                                        objDef->getSchemaName());
    if ( !sampleList ) return -1;

    Int64 updTimestamp = 0;

    HSTranMan *TM = HSTranMan::Instance();
    TM->Begin("READ AND UPDATE THE UPDATE DATE AND HISTORY from PERSISTENT SAMPLE TABLE");

    char ius_update_history_buffer[129];
    Lng32 retcode =
      sampleList->readIUSUpdateInfo(objDef, ius_update_history_buffer, &updTimestamp);
    if (retcode == 100)
      {
        HSFuncMergeDiags(- UERR_IUS_NO_PERSISTENT_SAMPLE,
                         objDef->getObjectFullName().data());
        retcode = -1;
        HSHandleError(retcode);
      }
    else
      {
        HSHandleError(retcode);
      }

    time_t t;

    //
    // A timestamp of 0 means the time stored is the epoch time: 00:00:00 on
    // January 1, 1970, Coordinated Universal Time (UTC), which is a value
    // indicating no IUS operation is in progress.
    //

    time(&t); // Obtain the current time as a timestamp since epoch

    if ( updTimestamp != 0 ) {
       //
       // Assign the value in seconds as the column UPDATE_DATE in
       // CAT.PUBLIC_ACCESS_SCHEMA.PERSISTENT_SAMPLES has no fractional part
       // (defined as TIMESTAMP(0)).
       //
       Int32 maxDeltaSeconds =
         ActiveSchemaDB()->getDefaults().getAsULong(USTAT_IUS_MAX_TRANSACTION_DURATION) * 60;

       if ( (Int64)(t) - updTimestamp < maxDeltaSeconds ) {
         // A legitimate instance of IUS is running. Return error.
         diagsArea << DgSqlCode(-UERR_IUS_IN_PROGRESS)
                   << DgString0(ius_update_history_buffer);
         return -UERR_IUS_IN_PROGRESS;
       }
    }

    // If we reach here, it means either there is no IUS operation in progress or
    // a previous IUS operation has spent more time than USTAT_IUS_MAX_TRANSACTION_DURATION,
    // we will update the PST entry with my IUS instance's information and the current
    // timestamp and proceed normally.

    // Break down the current timestamp t into a structure with year, month, day etc
    struct tm * bdt = gmtime(&t);

    NAString updTimestampStr;
    genSQLTimestampConstant(bdt, updTimestampStr);

    NAString nid_pid_str;
    genArkcmpInfo(nid_pid_str);

    retcode =
      sampleList->updIUSUpdateInfo(objDef,
                                   (char*)nid_pid_str.data(),
                                   (char*)updTimestampStr.data(),
                                   0 /* don't write where condition now */);

    if (retcode == 100)
      {
        HSFuncMergeDiags(- UERR_IUS_NO_PERSISTENT_SAMPLE,
                         objDef->getObjectFullName().data());
        retcode = -1;
      }

    HSHandleError(retcode);

    //
    // If we reach here, we have successfully stored the bdt and our process info
    // into the UPDATE_DATE and UPDATER_INFO columns of SB_PERSISTENT_SAMPLES.
    //
    PSRowUpdated = TRUE;
    return 0;
 }

 //
 // This method completes the relatively long-running transaction by updating the
 // row describing the persistent sample table in PERSISTENT_SAMPLE table
 // as follows.
 //
 //   1. UPDATE_DATE field is reset to a timestamp representing epoch time;
 //   2. UPDATER_INFO field is reset to an empty string.

 Lng32 HSGlobalsClass::end_IUS_work()
 {
    Lng32 retcode = 0;
    if (sampleIExists_)
      {
        retcode = drop_I(*hssample_table);
        // ignore retcode; we want to try the rest of this method as well
        sampleIExists_ = FALSE;
      }

 #ifdef _DEBUG
    if (CmpCommon::getDefault(USTAT_IUS_NO_BLOCK) == DF_ON)
      return 0;
 #endif

    HSPersSamples *sampleList = HSPersSamples::Instance(objDef->getCatName(),
                                                        objDef->getSchemaName());
    if ( !sampleList ) return -1;

    // The epoch time
    time_t t = 0;

    // Break down the timestamp t into a structure with year, month, day etc.
    // Should be '1970-01-01 00:00:00'!
    struct tm * bdt = gmtime(&t);

    NAString updTimestampStr;
    genSQLTimestampConstant(bdt, updTimestampStr);

    retcode =
      sampleList->updIUSUpdateInfo(objDef,
                                   (char*)"",
                                   (char*)updTimestampStr.data(),
                                   getWherePredicateForIUS(),
                                   PST_IUSrequestedSampleRows_,
                                   PST_IUSactualSampleRows_);
    HSHandleError(retcode);

    return 0;
 }

 Int32 HSGlobalsClass::moreColsForIUS()
 {
    Int32 count = 0;
    HSColGroupStruct *group = singleGroup;

    while (group != NULL)
     {
       if (group->state == UNPROCESSED)
           count++;

        group = group->next;
     }
    return count;

 }

 //
 // Prepare for IUS by performing the following tasks:
 //
 //   1. read the persistent samples table to find the
 //      #sample rows, the requested rows for IUS, and sample rate
 //   2. Find the size of the sample table of the previous IUS operation
 //      and assign it to argument 'currentSampleSize'
 //   2. Compute the size of the sample table for the new IUS operation
 //      and assign it to argument 'futureSampleSize'
 //   3. set the memNeeded field for each group use the size from 2)
 //
 Lng32 HSGlobalsClass::computeSampleSizeForIUS(Int64& currentSampleSize, Int64& futureSampleSize)
 {
   Lng32 retcode = 0;
   Int64 requestedRowsForIUS = 0;
   currentSampleSize = 0;

   if ( getPersistentSampleTableForIUS(*hssample_table,
                   requestedRowsForIUS, currentSampleSize,
                   sampleRateAsPercetageForIUS) )
     {
       // found the persistent sample table name, actual row-count, sample row-count,
       // and sample rate from persistent_samples table.
       externalSampleTable = TRUE;
       sampleTableUsed     = TRUE;
       samplingUsed        = TRUE;
     }
   else
     {
       HSFuncMergeDiags(- UERR_IUS_NO_PERSISTENT_SAMPLE,
                         objDef->getObjectFullName().data());
       retcode = -1;
       HSHandleError(retcode);
     }

    sampleRowCount = currentSampleSize;

    // Meta-info about the previous IUS iteration is found, compute the # of rows
    // for the new IUS iteration. "actualRowCount" is the size of the
    // source table now. Call it futureSampleSize.
    futureSampleSize = (Int64)(sampleRateAsPercetageForIUS * actualRowCount);

    // keep the sample table monotonically increasing.
    if ( futureSampleSize < currentSampleSize )
       futureSampleSize = currentSampleSize;

    return 0;
 }

 // Before we have the PERSISTENT_DATA table available to us, we will
 // save the CBFs as binary files on disk. One CBF maps to one binary file.
 // The path of the directory for these files is specified in CQD
 // USTAT_IUS_PERSISTENT_CBF_PATH, and the cbf file name is
 // sampleTableName + '.' + 'colName'. This function builds the common initial
 // text of the path for all columns in the same table, and assigns it to the
 // output parameter filePrefix.
 void HSGlobalsClass::getCBFFilePrefix(NAString& sampleTableName, NAString& filePrefix)
 {
   filePrefix = ActiveSchemaDB()->getDefaults().getValue(USTAT_IUS_PERSISTENT_CBF_PATH);
   filePrefix.append("/")
             .append(sampleTableName)
             .append(".");
 }

 void
 HSGlobalsClass::detectPersistentCBFsForIUS(NAString& sampleTableName,
                                            HSColGroupStruct *group)
 {
    NAString cbfFilePrefix;
    getCBFFilePrefix(sampleTableName, cbfFilePrefix);
    struct stat sts;
    while (group) {
       NAString cbfFile(cbfFilePrefix);
       cbfFile.append(group->cbfFileNameSuffix());

       if (stat(cbfFile, &sts) == -1 && errno == ENOENT)
         group->delayedRead = FALSE;
       else
         group->delayedRead = TRUE;

       group = group->next;
    }
 }

 Lng32 HSGlobalsClass::prepareForIUSAlgorithm1(Int64& rows)
 {
   Lng32 retcode = 0;

   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded())
     {
       char intStr[30];
       convertInt64ToAscii(rows, intStr);
       sprintf(LM->msg, "IUS in ::prepareForIUS(): |S(i-1)|=%s rows", intStr);
       LM->Log(LM->msg);
       LM->StartTimer("IUS: compute S(i-1)-D");
     }


   // Update the sample table in two separate transactions.
   Int64 xRows;

   // first delete the old rows

   HSHandleError(retcode);

   // temp. for now to generate the delQuery, set #rows to 1
   iusSampleDeletedInMem = new(STMTHEAP)
                               HSInMemoryTable(*hssample_table,
                                                getWherePredicateForIUS(),
                                                1 // rows
                                               );

   NABoolean transactional = (CmpCommon::getDefault(USTAT_DELETE_NO_ROLLBACK) == DF_OFF);

   NAString delQuery;
   generateIUSDeleteQuery(*hssample_table, delQuery, transactional);

   if (transactional)
     {
       retcode = HSFuncExecTransactionalQueryWithRetry(delQuery, -UERR_INTERNAL_ERROR,
                               &xRows,"IUS S(i-1)-D operation",
                               NULL, NULL);
     }
   else
     {
       retcode = HSFuncExecQuery(delQuery, -UERR_INTERNAL_ERROR,
                               &xRows,"IUS S(i-1)-D operation",
                               NULL, NULL);
     }

   HSHandleError(retcode);

   if (LM->LogNeeded())
      LM->StopTimer();

   rows -= xRows;

    if (LM->LogNeeded())
     {
       char intStr[30];
       convertInt64ToAscii(rows, intStr);
       sprintf(LM->msg, "IUS in ::prepareForIUS(): |S(i-1)-D|=%s rows", intStr);
       LM->Log(LM->msg);
     }


   if (LM->LogNeeded())
      LM->StartTimer("IUS: compute S(i-1) + I");

   { // start a new scope for the trasaction

     HSTranController TC("IUS: Update S with I", &retcode);
     HSHandleError(retcode);

     // temp. for now to generate the insQuery, set #rows to 1
     iusSampleInsertedInMem = new(STMTHEAP)
                               HSInMemoryTable(*user_table,
                                                getWherePredicateForIUS(),
                                                1, // rows,
                                                sampleRateAsPercetageForIUS);

     NAString insQuery;
     iusSampleInsertedInMem->generateInsertQuery(*hssample_table, *user_table, insQuery, FALSE);

     // Note that we don't retry the insert
     retcode = HSFuncExecQuery(insQuery, -UERR_INTERNAL_ERROR,
                                &xRows, "IUS S(i-1)-D+I operation",
                                NULL, NULL);
     HSHandleError(retcode);
   }

   if (LM->LogNeeded())
      LM->StopTimer();


   rows += xRows;

    if (LM->LogNeeded())
     {
       char intStr[30];
       convertInt64ToAscii(rows, intStr);
       sprintf(LM->msg, "IUS in ::prepareForIUS(): size of |Si|=%s rows", intStr);
       LM->Log(LM->msg);
     }

   return 0;
 }

 static Lng32 create_I(NAString& sampTblName)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
      LM->StartTimer("IUS: create _I table");

   NAString createI("create table ");
   createI += sampTblName;
   createI += "_I LIKE ";
   createI += sampTblName;
   createI += " WITH PARTITIONS";
   Lng32 retcode = HSFuncExecTransactionalQueryWithRetry(createI, -UERR_INTERNAL_ERROR,
                                   NULL, "IUS create I",
                                   NULL, NULL);
   if (LM->LogNeeded())
      LM->StopTimer();

   HSHandleError(retcode);
   return retcode;
 }


 Lng32 HSGlobalsClass::generateSampleI(Int64 currentSampleSize,
                                       Int64 futureSampleSize)
 {
   Lng32 retcode = 0;
   Int64 xRows;

   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
      LM->StartTimer("IUS: select-insert data set I");

     // performing:
     //
     //      upsert using load into <sample_I>
     //        (select * from <sourceTable> where <where> sample);
     //

     NAString sampleTable_I(*hssample_table);
     sampleTable_I.append("_I");

     HSTranMan *TM = HSTranMan::Instance();
     NABoolean transStarted = (TM->Begin("IUS clean data set I") == 0);

     NAString insertSelectIQuery;
     iusSampleInsertedInMem->generateInsertSelectIQuery(sampleTable_I,
                         *user_table, insertSelectIQuery,
                         hasOversizedColumns, objDef,
                         currentSampleSize, futureSampleSize,
                         actualRowCount);

     NABoolean needEspParReset = setEspParallelism(objDef);
     // note that we can't do a retry on non-transactional upsert using load + sample
     // note also the most likely error is a bad WHERE clause
     retcode = HSFuncExecQuery(insertSelectIQuery,
                               -UERR_IUS_BAD_WHERE_CLAUSE, &xRows,
                               "IUS data set I creation",
                               NULL, NULL,
                               0, TRUE);  // check for MDAM usage

     if (needEspParReset)
       resetEspParallelism();

     if (retcode) TM->Rollback();

     HSHandleError(retcode);

     TM->Commit();

   if (LM->LogNeeded())
     {
       LM->StopTimer();
       sprintf(LM->msg, "the size of data set I is " PF64" rows", xRows);
       LM->Log(LM->msg);
     }

   return 0;
 }

 static Lng32 drop_I(NAString& sampTblName)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     LM->StartTimer("IUS: drop _I table");

   NAString cleanupI("drop table if exists ");
   cleanupI.append(sampTblName).append("_I");
   Lng32 retcode = HSFuncExecTransactionalQueryWithRetry(cleanupI, -UERR_INTERNAL_ERROR,
                                   NULL, "IUS cleanup I",
                                   NULL, NULL);
   if (LM->LogNeeded())
     LM->StopTimer();
   HSHandleError(retcode);
   return retcode;
 }

 Lng32 HSGlobalsClass::CollectStatisticsForIUS(Int64 currentSampleSize,
                                               Int64 futureSampleSIze)
 {
   Lng32 retcode = 0;
   Int64    xRows = 0;

   HSLogMan *LM = HSLogMan::Instance();

   NABoolean havePending = FALSE;
   HSColGroupStruct *group = singleGroup;
   while (group)
     {
       if (group->state == PENDING)
         {
           if (group->delayedRead)
             group->state = SKIP; // temp. set so that the column data is not to be read
           else
             havePending = TRUE;
         }
       group = group->next;
     }

   // Only read in Si if there is at least one column in this batch that doesn't
   // already have a persistent CBF.
   if (havePending)
     {
       if (LM->LogNeeded())
         LM->StartTimer("IUS: read in Si");

       // Populate the data areas for the PENDING columns from the persistent
       // sample table.
       HSCursor cursor;  // on block exit, dtor will close/dealloc stmt and descriptors
       retcode = readColumnsIntoMem(&cursor, currentSampleSize);
       HSHandleError(retcode);
       checkTime("after reading pending columns into memory for IUS");

       if (LM->LogNeeded())
         LM->StopTimer();
     }
   else if (LM->LogNeeded())
     LM->Log("IUS: skipped reading in Si; delayedRead true for all columns in batch");

    // restore the state field to PENDING for any skipped groups.
    group = singleGroup;
    while (group) {
        if (group->delayedRead && group->state == SKIP)
            group->state = PENDING;
        group = group->next;
    }

   // Read in CBFs for groups that are PENDING and delayedRead
   retcode = readCBFsIntoMemForIUS(*hssample_table,singleGroup);
   HSHandleError(retcode);
   checkTime("after reading CBFs into memory for IUS");


   if (LM->LogNeeded())
     LM->StartTimer("IUS: read in data set D");

   // ================================================================
   // This section of code is only needed to support Algorithm 2.
   // ================================================================
   //
   // First loading data set D
   NAString selectDQuery;
   iusSampleDeletedInMem->generateSelectDQuery(*hssample_table,
                                                selectDQuery);

   retcode = iusSampleDeletedInMem->populate(selectDQuery);
   HSHandleError(retcode);

   if (LM->LogNeeded())
     LM->StopTimer();


   if (LM->LogNeeded())
     LM->StartTimer("IUS: read in data set I");

   // Next loading data set I. The I sample was generated by our caller.

   NAString selectIQuery;
   iusSampleInsertedInMem->generateSelectIQuery(*hssample_table,
                                                selectIQuery);
   retcode = iusSampleInsertedInMem->populate(selectIQuery);
   HSHandleError(retcode);
   checkTime("after populating tables for IUS");

   if (LM->LogNeeded())
     LM->StopTimer();

   //
   // ================================================================
   // End of the section of code is only needed to support Algorithm 2.
   // ================================================================


   if (LM->LogNeeded())
     LM->StartTimer("IUS: compute CBFs and estimate UECs");

   //
   // Handle incremental update of the histograms selected.
   // Only those columns marked as STATE == PENDING will be processed.
   // Updated histograms are written to the histograms/intervals table inside
   // indirect calling function UpdateStats().
   //
   retcode = incrementHistograms();
   HSHandleError(retcode);
   checkTime("after incrementing histograms for IUS");

   if (LM->LogNeeded())
     LM->StopTimer();

   // Write CBFs for groups that are PROCESSED and delayedRead back to disk.
   writeCBFstoDiskForIUS(*hssample_table, singleGroup);

   return retcode;
 }

 // Returns the table component of the fully qualified name passed in, using the
 // catalog and schema name from tblDef to determine where it starts (the table
 // is in the same schema as the one referenced by tblDef). This avoids problems
 // in parsing the fully qualified name posed by the possibility of periods within
 // delimited identifiers.
 static const char* extractTblName(const NAString& fullyQualifiedName,
 		                          HSTableDef* tblDef)
 {
   Lng32 tblNameOffset = tblDef->getCatName().length() +
                         tblDef->getSchemaName().length() +
                         2;  // 2 dot separators
   return fullyQualifiedName.data() + tblNameOffset;
 }

 // Update the persistent sample table and determine its new cardinality.
 //   1) Delete rows in the persistent sample satisfying the IUS predicate.
 //   2) Insert the rows from <sampleTblName>_I into the persistent sample
 //      if the _I table was created. If building histograms from scratch
 //      using the persistent sample rather than incrementally changing them
 //      (USTAT_INCREMENTAL_UPDATE_STATISTICS is DF_SAMPLE), insert sampled
 //      rows satisfying the IUS where clause directly from the source table
 //      In either case, these rows constitute a random sample of rows from
 //      the source table that satisfy the IUS predicate.
 //   3) From the prior cardinality of the sample table (oldSampleSize), subtract
 //      the number of rows deleted, add the number of rows inserted, and return
 //      the result in the newSampleSize parameter.
 //
 // This can't be done as part of end_IUS_work(), because that is called even
 // when the IUS fails; its purpose is just to modify the SB_PERSISTENT_SAMPLES
 // table to indicate that IUS is no longer in progress on the source table.
 // The persistent sample table itself is only modified if IUS is successful.
 Lng32 HSGlobalsClass::UpdateIUSPersistentSampleTable(Int64 oldSampleSize,
                                                      Int64 requestedSampleSize,
                                                      Int64& newSampleSize)
 {
   Lng32 retcode = 0;
   Int64 rowsAffected;
   HSLogMan *LM = HSLogMan::Instance();
   newSampleSize = oldSampleSize;  // before deleting/adding rows

   HSFuncExecQuery("CONTROL QUERY DEFAULT ALLOW_DML_ON_NONAUDITED_TABLE 'ON'");

   HSHandleError(retcode);

   // step 1  - delete the affected rows from PS
   NABoolean transactional = (CmpCommon::getDefault(USTAT_DELETE_NO_ROLLBACK) == DF_OFF);
   NAString deleteQuery;
   generateIUSDeleteQuery(*hssample_table, deleteQuery, transactional);

   if (LM->LogNeeded()) {
     LM->Log("query to delete from PS:");
     LM->Log(deleteQuery.data());
     LM->StartTimer("IUS: execute query to delete from PS");
   }

   rowsAffected = 0;

   // The most likely error on the DELETE would be due to a bad WHERE clause.
   // (When CQD USTAT_INCREMENTAL_UPDATE_STATISTICS is set to 'SAMPLE', this is
   // the first place that we attempt to use the user's WHERE clause.)
   if (transactional)
     {
       retcode = HSFuncExecTransactionalQueryWithRetry(deleteQuery, -UERR_IUS_BAD_WHERE_CLAUSE,
                             &rowsAffected,
                             "IUS delete from PS where",
                             NULL, NULL);
     }
   else
     {
       retcode = HSFuncExecQuery(deleteQuery, -UERR_IUS_BAD_WHERE_CLAUSE,
                             &rowsAffected,
                             "IUS delete from PS where",
                             NULL, NULL);
     }

   if (LM->LogNeeded()) {
     LM->StopTimer();
     sprintf(LM->msg, PF64 " rows deleted from persistent sample table.", rowsAffected);
     LM->Log(LM->msg);
   }
   HSHandleError(retcode);
   newSampleSize -= rowsAffected;

   // step 2  - add all rows from _I to PS, or sampled from source table,
   // depending on USTAT_INCREMENTAL_UPDATE_STATISTICS value.
   NAString selectInsertQuery;
   generateIUSSelectInsertQuery(*hssample_table, *user_table, selectInsertQuery);

   if (LM->LogNeeded()) {
     LM->Log("query to insert into PS:");
     LM->Log(selectInsertQuery.data());
     LM->StartTimer("IUS: execute query to insert into PS");
   }

   rowsAffected = 0;
   const char* insSourceTblName = extractTblName(*hssample_table + "_I", objDef);
   NABoolean needEspParReset = setEspParallelism(objDef, insSourceTblName);

   // can't retry this one, as it uses non-transactional upsert using load + random
   // select; a retry might add *another* random sample to a partial sample from
   // the previous attempt
   retcode = HSFuncExecQuery(selectInsertQuery, -UERR_INTERNAL_ERROR,
                             &rowsAffected,
                             "IUS insert into PS (select from _I)",
                             NULL, NULL, 0,
                             // check mdam usage if reading incremental sample directly from source table
                             CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) == DF_SAMPLE);  //checkMdam
   if (LM->LogNeeded()) {
     LM->StopTimer();
     sprintf(LM->msg, PF64 " rows inserted into persistent sample table.", rowsAffected);
     LM->Log(LM->msg);
   }
   if (needEspParReset)
     resetEspParallelism();
   HSHandleError(retcode);
   newSampleSize += rowsAffected;

   // Save sample count values to update row in SB_PERSISTENT_SAMPLES table.
   PST_IUSrequestedSampleRows_ = new(STMTHEAP) Int64;
   *PST_IUSrequestedSampleRows_ = requestedSampleSize;
   PST_IUSactualSampleRows_ = new(STMTHEAP) Int64;
   *PST_IUSactualSampleRows_ = newSampleSize;

   HSFuncExecQuery("CONTROL QUERY DEFAULT ALLOW_DML_ON_NONAUDITED_TABLE reset");

   checkTime("after updating persistent sample table for IUS");
   return retcode;
 }

 // Read in CBFs for groups that are PENDING and delayedRead flag is TRUE
 Int32 HSGlobalsClass::readCBFsIntoMemForIUS(NAString& sampleTableName,
                                             HSColGroupStruct* group
      )
 {
    NAString cbfFilePrefix;
    getCBFFilePrefix(sampleTableName, cbfFilePrefix);

    Lng32 sz;
    Lng32 bufSz = 0;
    char* bufptr = NULL;
    struct stat sts;

    while (group) {

       if ( group->delayedRead && group->state == PENDING ) {

         // Reset to FALSE here to assume some problem reading in CBF.
         // Once the CBF does read in successfully, we set the flag to TRUE below.
         group->delayedRead = FALSE;

         NAString cbfFile(cbfFilePrefix);
         cbfFile.append(group->cbfFileNameSuffix());

         if (stat(cbfFile, &sts) == 0) {
            if ( bufSz < sts.st_size ) {
               NADELETEBASIC(bufptr, STMTHEAP);
               bufSz = sts.st_size;
               bufptr = new (STMTHEAP) char[bufSz];
            }

            Lng32 fd = open(cbfFile.data(), O_RDONLY);
            if ( fd != -1 ) {
               // Use a different buffer point because unpackBuffer() will
               // advance the buffer argument!
               char* buffer = bufptr;
               sz = read(fd, buffer, bufSz);
               if ( sz == sts.st_size ) {
                  group->cbf = new (STMTHEAP)
                         CountingBloomFilterWithKnownSkews(STMTHEAP);
                  group->cbf->unpackBuffer(buffer);
                  group->delayedRead = TRUE;
               }
               close(fd);
            }
         }
       }

       group = group->next;
    }

    return 0;
 }


 int file_select(const struct direct *entry)
 {
    if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
      return 0;
    else
      return 1;
 }


 // Compute total disk space in 512kb blocks used by persistent CBFs contained in
 // dir USTAT_IUS_PERSISTENT_CBF_PATH
 UInt32 computeTotalCBFDiskSpaceInBlocks(NAString& cbf_path)
 {
    struct direct **files = NULL;


    Int32 count = scandir(cbf_path.data(), &files, file_select,
                          (int(*)(const dirent**, const dirent**))alphasort);

    UInt32 sum = 0;
    struct stat buf;

    // Sum up the amount of disk spaces used by each cbf file
    for (Int32 i=0; i<count; i++) {

       NAString fullpath(cbf_path);
       fullpath += "/";
       fullpath += files[i]->d_name;

       if ( !stat(fullpath, &buf) ) {
         sum += buf.st_blocks;
       }
    }

    // Variable 'sum' has # of units in 512 KB.
    //
    // Quote from Linux man page on stats():
    //    The st_blocks  field  indicates  the  number  of blocks allocated to
    //    the file, 512-byte units.

    return sum;
 }

 // Compute total disk space in 512k blocks and pass it in totalSpace.
 // Return TRUE when there is no issue found.
 NABoolean getTotalDiskSizeInBlocks(NAString& cbf_path, UInt64& totalSpace)
 {
    struct statvfs fsstats;

    if ( !statvfs(cbf_path.data(), &fsstats) ) {
      totalSpace = (UInt64)(fsstats.f_bsize) * fsstats.f_blocks  / 512;
      return TRUE;
    } else
      return FALSE;
 }

 // Check if one more cbf 'cbf' can be added to the cbf_path dir
 NABoolean hasSpaceTostoreCBF(NAString& cbf_path,
                              CountingBloomFilter* cbf,
                              UInt64 totalAllowedInBlocks
                             )
 {
    UInt32 totalOnDisk = computeTotalCBFDiskSpaceInBlocks(cbf_path);

    if ( totalOnDisk + cbf->getTotalMemSize() / 512 <= totalAllowedInBlocks )
      return TRUE;
    else
      return FALSE;
 }


 // Write to disk for CBFs for groups that are PROCESSED and cbf ptr is not NULL
 Int32 HSGlobalsClass::writeCBFstoDiskForIUS(NAString& sampleTableName,
                                             HSColGroupStruct* group
      )
 {
    HSLogMan *LM = HSLogMan::Instance();
    if (LM->LogNeeded())
      LM->StartTimer("IUS: write CBF files to disk");

    NAString path =
      ActiveSchemaDB()->getDefaults().getValue(USTAT_IUS_PERSISTENT_CBF_PATH);

    ULng32 totalCBFsizeInMB =
      ActiveSchemaDB()->getDefaults().getAsULong(USTAT_IUS_MAX_PERSISTENT_DATA_IN_MB);

    float percentage = (float)
      ActiveSchemaDB()->getDefaults().getAsDouble(USTAT_IUS_MAX_PERSISTENT_DATA_IN_PERCENTAGE);

    UInt64 totalSpaceInBlocks = 0;

    if ( !getTotalDiskSizeInBlocks(path, totalSpaceInBlocks) ) {
      if (LM->LogNeeded())
        LM->StopTimer();
      return 0;
    }

    UInt64 totalAllowedInBlocks = MINOF(totalCBFsizeInMB * 1024 / 2,
                                        totalSpaceInBlocks * percentage);

    NAString cbfFilePrefix;
    getCBFFilePrefix(sampleTableName, cbfFilePrefix);

    Lng32 sz;
    Lng32 bufSz = 0;
    char* bufptr = NULL;

    Int32 count = 0;

    while (group) {

       if ( group->cbf && group->state == PROCESSED ) {

         NAString cbfFile(cbfFilePrefix);
         cbfFile.append(group->cbfFileNameSuffix());

         Lng32 cbfSz = group->cbf->getTotalMemSize();

         if ( !hasSpaceTostoreCBF(path, group->cbf, totalAllowedInBlocks) ) {
            group=group->next;
            continue;
         }

         if ( bufSz < cbfSz ) {

            if ( bufptr )
               NADELETEBASIC(bufptr, STMTHEAP);

            bufSz = cbfSz + 100;
            bufptr = new (STMTHEAP) char[bufSz];
         }

         Lng32 fd = open(cbfFile.data(), O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU);
         if ( fd != -1 ) {

            char* buffer = bufptr;
            sz = group->cbf->packIntoBuffer(buffer, FALSE /* no bytes swapping */ );
            HS_ASSERT( sz <= bufSz);
            HS_ASSERT( sz <= buffer - bufptr);

            ssize_t wsz = write(fd, bufptr, sz);

            if ( wsz != sz ) {
               // TBD. Need to remove the file written (if exist)
            } else
               count++;

            close(fd);

         }

         // Make sure we don't write it again on next batch.
         delete group->cbf;
         group->cbf = NULL;
       }

       group = group->next;
    }
    NADELETEBASIC(bufptr, STMTHEAP);

    if (LM->LogNeeded())
      LM->StopTimer();
    return count;
 }

 Int32 HSGlobalsClass::deletePersistentCBFsForIUS(NAString& sampleTableName,
                                                  HSColGroupStruct* group,
                                                  SortState stateToDelete)
 {
    NAString cbfFilePrefix;
    getCBFFilePrefix(sampleTableName, cbfFilePrefix);

    while (group) {

       if ( group->cbf && group->state == stateToDelete ) {

         NAString cbfFile(cbfFilePrefix);
         cbfFile.append(group->cbfFileNameSuffix());

         remove(cbfFile.data());

         // Make sure this unused CBF does not get persisted.
         delete group->cbf;
         group->cbf = NULL;
       }

       group = group->next;
    }

    return 0;
 }

 Lng32 HSGlobalsClass::selectIUSBatch(Int64 currentRows, Int64 futureRows, NABoolean& ranOut, Int32& colsSelected)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     LM->StartTimer("IUS: selectIUSBatch()");

   colsSelected = 0;
   Int32 colsSuggested = 0;  // number of cols we try to allocate
   iusSampleDeletedInMem->depopulate();
   iusSampleInsertedInMem->depopulate();
   Int64 memAllowed = getMaxMemory();
   Int64 memLeft = memAllowed;

   ranOut = FALSE;  // set to true if not enough memory for all cols

   Lng32 retcode = 0;
   Int64 tableUID = objDef->getObjectUID();
   char UIDStr[30];
   convertInt64ToAscii(tableUID,UIDStr);
   Lng32 colnum = 0;

   UInt32 histID = 0;
   Int16 intvlCount = 0;
   Int64 totalRowCount = 0;
   Int64 totalUEC = 0;
   Int64 v2;

   // SELECT HISTOGRAM_ID, INTERVAL_COUNT, ROWCOUNT, TOTAL_UEC, V2
   // FROM SB_HISTOGRAMS
   // WHERE TABLE_UID = tableUID AND COLCOUNT = 1 AND COLUMN_NUMBER = CAST(? AS INTEGER)

   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, "HSGlobalsClass::selectIUSBatch", TRUE);

   NAString query = "SELECT HISTOGRAM_ID, INTERVAL_COUNT, ROWCOUNT, TOTAL_UEC, V2 FROM ";
   query += *hstogram_table;
   query += " WHERE TABLE_UID = ";
   query += UIDStr;
   query += " AND COLCOUNT = 1 AND COLUMN_NUMBER = CAST(? AS INTEGER)"; // single column histograms only

   HSCursor histCursor;
   histCursor.prepareQuery(query.data(), 1, 5); // 1 input parameter, 5 output

   // Memory required by RUS has been estimated in prepareForIUS().
   // Here we need to add the extra amount needed by IUS. Do it for each group below.

   // Visit all unprocessed items looking for ones that fit. No early loop exit if memLeft
   // is 0, because it is very unlikely to hit zero exactly. If trySampleInMemory
   // is set, we ignore whether columns have uec too low to perform with internal
   // sort. If all column samples won't fit in memory, we do it over and take that
   // into account the second time.

   float false_prob = (float)CmpCommon::getDefaultNumeric(USTAT_INCREMENTAL_FALSE_PROBABILITY);


   if (LM->LogNeeded()) {
     LM->Log("In ::selectIUSBatch()");
   }

   HSColGroupStruct *group = singleGroup;
   HSColGroupStruct* delGroup = iusSampleDeletedInMem->getColumns();
   HSColGroupStruct* insGroup = iusSampleInsertedInMem->getColumns();

   while (group != NULL)
     {
       if ( group->state == NO_STATS || group->state != UNPROCESSED ||
            group->memNeeded == 0  // was set to 0 if exceeds address space
          )
       {
          if (group->memNeeded == 0)
            ranOut = TRUE;
          group = group->next;
          delGroup = delGroup->next;
          insGroup = insGroup->next;
          continue;
       }

       colnum = group->colSet[0].colnum;
       retcode = histCursor.open(1, (void*)&colnum);
       HSHandleError(retcode);
       retcode = histCursor.fetch(5, (void*)&histID, (void*)&intvlCount,
                                     (void*)&totalRowCount, (void*)&totalUEC,
                                     (void*)&v2);

        if ( retcode == 0 ) {

          retcode = histCursor.close();

          group->avgVarCharSize = (double)v2;
          Lng32 maxHashsToUse =
           (ActiveSchemaDB()->getDefaults()).
                getAsLong(USTAT_IUS_MAX_NUM_HASH_FUNCS);

          group->groupHist = new(STMTHEAP) HSHistogram(intvlCount,
                                                         totalRowCount, // Not needed here
                                                         0, 0,
                                                         TRUE, FALSE);
          // Interval count passed to ctor is max # intervals; need to call the
          // following to set actual number of intervals used.
          group->groupHist->setCurrentInt(intvlCount);
          retcode = initIUSIntervals(group, delGroup, insGroup, histID, intvlCount);
          HSHandleError(retcode);

          // group->groupHist->maxStddev_ now contains the max of the stddev of freq
          // at each interval.

          double maxStddev = group->groupHist->getMaxStddev();

          //group->groupHist->logAll(title);


          UInt64 memForCBF = CountingBloomFilterWithKnownSkews::estimateMemoryInBytes(

                         maxHashsToUse,

                         // The expected number of distinct keys
                         // @WARN: Have to do a narrowing cast here, ctor only takes a UInt32.

                         // Worse case assumption is that all rows in
                         // the old and new sample are distinct.  We take the sum of that
                         // minus the size of Di. Subject the result to the MAX of total UEC.
                         MAXOF((UInt32)(currentRows -
                                        iusSampleDeletedInMem->getNumRows()+
                                        iusSampleInsertedInMem->getNumRows()),
                                       (UInt32)totalUEC),

                         // probability of false positives, from CQD
                         false_prob,

                         // take the 3x max stddev among intervals
                         UInt32(totalRowCount / totalUEC) + 3*UInt32(ceil(maxStddev)),

                         // expected # of keys with high frequency. +1 to include the
                         // dummy interval.
                         (UInt32)((intvlCount+1)  * 2),

                         intvlCount+1
                       );


          Int64 totMemNeeded = Int64(group->memNeeded
                                + delGroup->memNeeded
                                + insGroup->memNeeded
                                + memForCBF);

          if ( totMemNeeded < memLeft )
          {
            group->state = PENDING;
            delGroup->state = PENDING;
            insGroup->state = PENDING;


            colsSuggested++;
            memLeft -= totMemNeeded;

          } else {
            // Not enough memory for the group. Leave the group in UNPROCESSED state,
            // and get rid of the HSHistogram object we created for it.
            ranOut = TRUE;
            delete group->groupHist;
            group->groupHist = NULL;
            if (LM->LogNeeded()) {
               sprintf(LM->msg, "Not enough memory for %s: memLeft=" PF64 " totMemNeeded=", group->colNames->data(), memLeft);
               formatFixedNumeric((Int64)totMemNeeded, 0, LM->msg+strlen(LM->msg));
               LM->Log(LM->msg);
               sprintf(LM->msg, "group->memNeeded=" PF64"", group->memNeeded);
               LM->Log(LM->msg);
               sprintf(LM->msg, "delGroup->memNeeded=" PF64"", delGroup->memNeeded);
               LM->Log(LM->msg);
               sprintf(LM->msg, "insGroup->memNeeded=" PF64"", insGroup->memNeeded);
               LM->Log(LM->msg);
               sprintf(LM->msg, "memForCBF=");
               formatFixedNumeric((Int64)memForCBF, 0, LM->msg+strlen(LM->msg));
               LM->Log(LM->msg);
            }
          }

       } else if ( retcode == 100 ) {
           // Ignore the group if there is no stats for it!

           if (LM->LogNeeded()) {
               sprintf(LM->msg, "No stats: histTableName=%s, tableUid=" PF64", colnum=%d",
                                (char*)hstogram_table->data(),
                                tableUID,
                                group->colSet[0].colnum);
               LM->Log(LM->msg);
           }

           retcode = histCursor.close();
           HSHandleError(retcode);

           diagsArea << DgSqlCode(UERR_IUS_NO_EXISTING_STATS)
                     << DgString0(group->colSet[0].colname->data());

           // No stats exist for the group. Change the process state
           // to NO_STATS so that we can skip it again in next call
           // to this routine and accurately report missing stats for each
           // column once.
           group->state = NO_STATS;

        } else {
           histCursor.close();
           HSHandleError(retcode);
        }

        group = group->next;
        delGroup = delGroup->next;
        insGroup = insGroup->next;
     }

    // Now allocate memory for singleGroup, inMemDelete and inMemInsert table,
    // for each column in PENDING state.
    colsSelected = allocateMemoryForIUSColumns(singleGroup, futureRows,
                                iusSampleDeletedInMem->getColumns(),
                                iusSampleDeletedInMem->getNumRows(),
                                iusSampleInsertedInMem->getColumns(),
                                iusSampleInsertedInMem->getNumRows());

    if (LM->LogNeeded())
     {
       LM->Log("Columns selected by selectIUSBatch():");
       group = singleGroup;
       while (group != NULL)
         {
           if (group->state == PENDING)
             {
               sprintf(LM->msg, "    %s (" PF64" bytes)",
                                group->colSet[0].colname->data(),
                                group->memNeeded);
               LM->Log(LM->msg);
             }
           group = group->next;
         }
      sprintf(LM->msg, "return from selectIUSBatch(): columns originally selected = %d, "
                       "columns able to allocate = %d", colsSuggested, colsSelected);
      LM->Log(LM->msg);
      LM->StopTimer();
     }

   return retcode;
 }

 // Use In-memory tables to update histograms incrementally.
 Lng32 HSGlobalsClass::incrementHistograms()
 {
   HSLogMan *LM = HSLogMan::Instance();

   Lng32 retcode = 0;

   HSColGroupStruct* group = singleGroup;
   HSColGroupStruct* delGroup = iusSampleDeletedInMem->getColumns();
   HSColGroupStruct* insGroup = iusSampleInsertedInMem->getColumns();

   while (group)
     {
        if ( group->state == PENDING ) {

          retcode = processIUSColumn(group, delGroup, insGroup);

          if ( retcode > 0 ) {
            // IUS is not successful. Keep the group
            // in PENDING state so that it can be dealt with in the internal sort code path.

            retcode = 0;
          } else {
            if ( retcode == 0 ) {
               group->state = PROCESSED; // IUS successful.
               group->freeISMemory();
               delGroup->state = PROCESSED; // IUS successful.
               delGroup->freeISMemory();
               insGroup->state = PROCESSED; // IUS successful.
               insGroup->freeISMemory();
            } else
               HSHandleError(retcode);
          }
       }

       group = group->next;
       delGroup = delGroup->next;
       insGroup = insGroup->next;
     }

   return retcode;
 }

 Int32 HSGlobalsClass::processIUSColumn(HSColGroupStruct* smplGroup,
                                        HSColGroupStruct* delGroup,
                                        HSColGroupStruct* insGroup)
 {
   Int32 retcode = 0;
   Lng32 datatype = smplGroup->ISdatatype;

   // Only need to handle types used for IS/IUS. Datetime/interval types and
   // non-integral fixed numerics are all converted to one of these types.
   switch (datatype)
     {
       case REC_BIN8_SIGNED:
         return processIUSColumn((Int8*)smplGroup->data, L"%hd", smplGroup, delGroup, insGroup);
         break;
       case REC_BOOLEAN:
       case REC_BIN8_UNSIGNED:
         return processIUSColumn((UInt8*)smplGroup->data, L"%hu", smplGroup, delGroup, insGroup);
         break;
       case REC_BIN16_SIGNED:
         return processIUSColumn((Int16*)smplGroup->data, L"%hd", smplGroup, delGroup, insGroup);
         break;
       case REC_BPINT_UNSIGNED:
       case REC_BIN16_UNSIGNED:
         return processIUSColumn((UInt16*)smplGroup->data, L"%hu", smplGroup, delGroup, insGroup);
         break;
       case REC_BIN32_SIGNED:
         return processIUSColumn((Int32*)smplGroup->data, L"%d", smplGroup, delGroup, insGroup);
         break;
       case REC_BIN32_UNSIGNED:
         return processIUSColumn((UInt32*)smplGroup->data, L"%u", smplGroup, delGroup, insGroup);
         break;
       case REC_BIN64_SIGNED:
         return processIUSColumn((Int64*)smplGroup->data, L"%lld", smplGroup, delGroup, insGroup);
         break;
       case REC_BIN64_UNSIGNED:
         return processIUSColumn((UInt64*)smplGroup->data, L"%llu", smplGroup, delGroup, insGroup);
         break;
       case REC_FLOAT32:
         return processIUSColumn((Float32*)smplGroup->data, L"%f", smplGroup, delGroup, insGroup);
         break;
       case REC_FLOAT64:
         return processIUSColumn((Float64*)smplGroup->data, L"%lf", smplGroup, delGroup, insGroup);
         break;
       case REC_BYTE_F_ASCII:
       case REC_BYTE_F_DOUBLE:
         {
           // Create an object to be used with the value iterator; does not own its content.
           // In setting length, take into account that length in IUSFixedChar is in
           // characters instead of bytes.
           IUSFixedChar fixedChar(FALSE);
           if (DFS2REC::isDoubleCharacter(smplGroup->ISdatatype))
             IUSFixedChar::setLength(smplGroup->ISlength / 2);
           else
             IUSFixedChar::setLength(smplGroup->ISlength);
           IUSFixedChar::setCaseInsensitive(smplGroup->colSet[0].caseInsensitive == 1);
           IUSFixedChar::setColCollation(smplGroup->colSet[0].colCollation);
           IUSFixedChar::setCharSet(smplGroup->colSet[0].charset);
           return processIUSColumn(&fixedChar, L"", smplGroup, delGroup, insGroup);
         }
         break;
       case REC_BYTE_V_ASCII:
       case REC_BYTE_V_DOUBLE:
         {
           // Create an object to be used with the value iterator; does not own its content.
           // In setting length, take into account that length in IUSFixedChar is in
           // characters instead of bytes.
           IUSVarChar varChar(FALSE);
           if (DFS2REC::isDoubleCharacter(smplGroup->ISdatatype))
             IUSVarChar::setDeclaredLength(smplGroup->ISlength / 2);
           else
             IUSVarChar::setDeclaredLength(smplGroup->ISlength);
           IUSVarChar::setCaseInsensitive(smplGroup->colSet[0].caseInsensitive == 1);
           IUSVarChar::setColCollation(smplGroup->colSet[0].colCollation);
           IUSVarChar::setCharSet(smplGroup->colSet[0].charset);
           return processIUSColumn(&varChar, L"", smplGroup, delGroup, insGroup);
         }
         break;

       default:
         retcode = -1;
         HSHandleError(retcode);
     } // switch

   return retcode;
 }

 Lng32 HSGlobalsClass::initIUSIntervals(HSColGroupStruct* smplGroup,
                                        HSColGroupStruct* delGroup,
                                        HSColGroupStruct* insGroup,
                                        UInt32 histID,
                                        Int16 numIntervals)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     LM->StartTimer("IUS: initIUSIntervals()");
   typedef Int16 LenType;
   Lng32 retcode = 0;
   Int64 tableUID = objDef->getObjectUID();
   char UIDStr[30];
   convertInt64ToAscii(tableUID,UIDStr);

   Int64 rowCount;
   Int16 intvlNum;
   Int64 uec;
   double max_stddev = 0.0; // max stddev per histogram
   double stddev; // stddev per interval
   Int64 v1,v2;   // read but not used
   NAWchar boundarySpec[HS_MAX_UCS_BOUNDARY_CHAR + 1];  // +1 for 2-byte count
   NAWchar MFV[HS_MAX_UCS_BOUNDARY_CHAR + 1];

   char histIDStr[20];
   sprintf(histIDStr,"%u",histID);

   // SELECT several columns
   // FROM SB_HISTOGRAM_INTERVALS
   // WHERE TABLE_UID = tableUID AND HISTOGRAM_ID = histID
   // ORDER BY INTERVAL_NUMBER

   NAString query = "SELECT INTERVAL_NUMBER, INTERVAL_ROWCOUNT, INTERVAL_UEC,"
                    " INTERVAL_BOUNDARY, STD_DEV_OF_FREQ, V1, V2, V5 FROM ";
   query += *hsintval_table;
   query += " WHERE TABLE_UID = ";
   query += UIDStr;
   query += " AND HISTOGRAM_ID = ";
   query += histIDStr;
   query += " ORDER BY INTERVAL_NUMBER";

   HSCursor intvlCursor(STMTHEAP,HS_INTERVAL_STMT_ID);
   intvlCursor.prepareQuery(query.data(), 0, 8); // no input parameters, 8 output

   retcode = intvlCursor.open();
   HSHandleError(retcode);

   HSHistogram* hist = smplGroup->groupHist;
   Int16 fetchCount = 0;

   while (TRUE)
     {
       retcode = intvlCursor.fetch(8, (void*)&intvlNum,
                                      (void*)&rowCount,
                                      (void*)&uec,
                                      (void*)boundarySpec,
                                      (void*)&stddev, (void*)&v1, (void*)&v2,
                                      (void*)MFV);
       if (retcode == HS_EOF)
         {
           retcode = 0;
           break;
         }
       else
         if ( retcode != 0 ) {
         }
       HSHandleError(retcode);

       if ( stddev > max_stddev )
         max_stddev = stddev;

       fetchCount++;

       hist->setIntRowCount(intvlNum, rowCount);
       hist->setIntUec(intvlNum, uec);

       // The boundary and MFV values stored in the histogram_intervals table
       // include closing parens that we need to remove in the in-memory version.
       hist->setIntBoundary(intvlNum,
                            ((char*)boundarySpec) + sizeof(LenType) + sizeof(NAWchar),
                            *(LenType*)boundarySpec - (2 * sizeof(NAWchar)));
       hist->setIntMFVValue(intvlNum,
                            ((char*)MFV) + sizeof(LenType) + sizeof(NAWchar),
                            *(LenType*)MFV - (2 * sizeof(NAWchar)));

       hist->setIntMFVRowCount(intvlNum, v1);
       hist->setIntMFV2RowCount(intvlNum, v2);
     }

   retcode = intvlCursor.close();
   HSHandleError(retcode);

   hist->setMaxStddev(max_stddev);

   // 0th interval used only to hold lowest value and is not included in count of
   // intervals, so we should have fetched 1 more than numIntervals.
   HS_ASSERT(fetchCount == numIntervals+1);

   // Interval to count nulls, if present, is last one. The last interval is at
   // index numIntervals even though the array is 0-based, because the 0th interval
   // is not included in the count stored in the Histograms table.
   if (!na_wcscmp((const NAWchar*)hist->getIntBoundary(numIntervals).data(), L"NULL"))
     hist->setHasNull(TRUE);
   else
     {
       // Histogram for this sample table column has no null interval; should not
       // be any nulls for the column in rows deleted from sample. If there are
       // nulls for the column in rows to be added to the sample, we must create
       // a null interval in the histogram.

       HS_ASSERT(delGroup->nullCount == 0);
       if (insGroup->nullCount > 0)
         // Nulls will be included in this histogram where none were before.
         hist->addNullInterval(insGroup->nullCount, insGroup->colCount);  //@ZX -- may do more than we want

     }

   if (LM->LogNeeded())
     LM->StopTimer();
   return retcode;
 }

 /*************************************************/
 /* METHOD:  FlushStatistics()                    */
 /* PURPOSE: Final histogram processing stage.    */
 /*          Based on what user requested, it     */
 /*          will finalize the request. An        */
 /*          HSTranController is declared at the  */
 /*          start of this function, which causes */
 /*          the body of the fn to be executed in */
 /*          a transaction.                       */
 /* PARAMS:  statsWritten(out) - tells if stats   */
 /*                              were actually    */
 /*                              written to db.   */
 /* RETCODE:  0 - successful                      */
 /*          -1 - failure                         */
 /*************************************************/
 Lng32 HSGlobalsClass::FlushStatistics(NABoolean &statsWritten)
   {
     HSLogMan *LM = HSLogMan::Instance();
     Lng32 retcode = 0;
     statsWritten = FALSE;

                                           /*=================================*/
                                           /*        CLEAR OPTION USED        */
                                           /*=================================*/
     if (optFlags & CLEAR_OPT)
       {
         // Execute in a transaction.  Transaction will rollback or commit based
         // on retcode, once this block is left.
         HSTranController TC("FLUSH STATISTICS", &retcode);
         HSHandleError(retcode);
         if (groupCount == 0)                 /*== No groups specified      ==*/
           {                                       /* delete all histograms   */
             retcode = ClearAllHistograms();       /* generated for table     */
             HSHandleError(retcode);
           }
         else
           {                                  /*== Group list specified     ==*/
         	LM->StartTimer("MakeAllHistid (for CLEAR)");
             retcode = MakeAllHistid();         /* determine histogram ids    */
                                                /* Performs SERIALIZABLE read */
             LM->StopTimer();
             HSHandleError(retcode);
             retcode = ClearSelectHistograms();   /*delete selected histograms*/
             HSHandleError(retcode);               /* generated for table     */
           }

         //When a SQL/MP table is dropped, the histograms are not automatically
         //deleted. Whenever the CLEAR option is used, we always delete
         //orphan histograms - whether they exist or not. Having orphan
         //histograms does not hurt anything.
         if (tableFormat == SQLMP)
           DeleteOrphanHistograms();
       }
                                           /*=================================*/
                                           /*  COLLECT FILE-LEVEL STATISTICS  */
                                           /*=================================*/
     /* no histograms generated only collect file level statistics */
     else if (groupCount == 0)
       {
          if (CmpCommon::getDefault(USTAT_COLLECT_FILE_STATS) == DF_ON)
          {
            // Execute in a transaction.  Transaction will rollback or commit based
            // on retcode, once this block is left.
            HSTranController TC("FLUSH STATISTICS", &retcode);
            HSHandleError(retcode);
            LM->StartTimer("collectFileStatistics()");
            retcode = objDef->collectFileStatistics();
            LM->StopTimer();
            HSHandleError(retcode);
          }

          // Absence of column groups to update could be due to either
          // not specifying ON clause, or using EXISTING when there are
          // no existing histograms or NECESSARY when there are no obsolete
          // or ungenerated histograms.
          if (optFlags & EXISTING_OPT)
            diagsArea << DgSqlCode(UERR_WARNING_NO_EXISTING_HISTOGRAMS);
          else if (optFlags & NECESSARY_OPT)
            diagsArea << DgSqlCode(UERR_WARNING_NO_OBSOLETE_HISTOGRAMS);
          else
            diagsArea << DgSqlCode(UERR_WARNING_FILE_STATISTICS);
       }
                                           /*=================================*/
                                           /*     UPDATE HISTOGRAM TABLES     */
                                           /*=================================*/
     else
       {
         // Execute in a transaction.
         { // New block to ensure transaction termination via HSTranController dtor.
           HSTranController TC("FLUSH STATISTICS", &retcode);
           HSHandleError(retcode);

           // IUS work: Keep warnings unless there are errors.
           if ( CmpCommon::diags()->getNumber(DgSqlCode::ERROR_) > 0 )
               CmpCommon::diags()->clear();

           LM->StartTimer("MakeAllHistid()");
           retcode = MakeAllHistid();      /* Determine histogram ids       */
           LM->StopTimer();
           HSHandleError(retcode);         /* Performs SERIALIZABLE read    */

           LM->StartTimer("WriteStatistics()");
           retcode = WriteStatistics();    /* Write new histograms using    */
           LM->StopTimer();
           HSHandleError(retcode);         /* precomp queries, then dels    */
                                           /* old hists using dyn query.    */
           statsWritten = TRUE;            /* no error writing hist stats   */
         } // block for flush stats transaction

         // Separate transaction for collection of file statistics.
         if (CmpCommon::getDefault(USTAT_COLLECT_FILE_STATS) == DF_ON)
         { // New block to ensure transaction termination via HSTranController dtor.
           HSTranController TC("GET FILE STATISTICS", &retcode);
           HSHandleError(retcode);
           retcode = objDef->collectFileStatistics(); /* collect file level statistics */
           if (retcode < 0)
             {
               // Update of histograms completed ok, but the collection of file
               // statistics failed, possibly due to contention for the Partitions
               // metadata table. Convert error diagnostics to warnings, and add
               // an explanatory one.
               NegateAllErrors(&diagsArea);
               HSFuncMergeDiags(UERR_WARNING_FILESTATS_FAILED);
             }
         }

       }

     if (retcode == 0)
       {
         // invalidate any cached histograms in the cluster
         CmpSeabaseDDL::invalidateStats(objDef->getObjectUID());
       }

     return retcode;
   }


 /*****************************************************************************/
 /* METHOD:  WriteStatistics()                                                */
 /* PURPOSE: Inserts into the HISTOGRAM table using INSERT101_... precompiled */
 /*          query with unique histogram ids obtained from MakeAllHistIds.    */
 /*          Then inserts into the HISTOGRAM_INTERVAL table using INSERT201.. */
 /*          precompiled query.  Finally, deletes old histograms using        */
 /*          dynamic queries.                                                 */
 /* RETCODE:  0 - successful                                                  */
 /*          -1 - failure                                                     */
 /*****************************************************************************/
 Lng32 HSGlobalsClass::WriteStatistics()
   {
     Lng32     retcode = 0;
     char     tempStr[2000];
     char     uidStr[30];
     char     compileStatsReason=0;
     NAString oldHistList, dupHistList;
     NAString stmt;
     NABoolean processMultiGroups;
     HSColGroupStruct *group;
     HSColumnStruct   *col;
     HSLogMan *LM = HSLogMan::Instance();
     HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, "FLUSH_STATISTICS", TRUE);
     Lng32 i;

     // Use auto_ptr to ensure that histRS and histintRS are deleted when we exit
     // this function. Clearing of STMTHEAP when the statement ends wouldn't be
     // good enough; the dtors must be executed so the CLI statements will be
     // deallocated. Otherwise, a "statement already in use" error occurs for the
     // next instance of HSinsertHist or HSinsertHistint.
     std::auto_ptr<HSinsertHist> histRS(NULL);
     std::auto_ptr<HSinsertHistint> histintRS(NULL);

     //DEFAULT VALUES FOR HISTOGRAM TABLE.
     //These values should only be used when base table is empty. Otherwise, we
     //should get values from the generated histogram.
     Lng32 numInts    = 1;
     Int64 totalUec  = 0;
     Int64 avgVarCharSize  = 0;
     HSDataBuffer lval(WIDE_("()"));
     HSDataBuffer hval(WIDE_("()"));
     short readCount     = 0;
     //Int64 hv3           = 0;
     //Int64 hv4           = 0;
     //HSDataBuffer hv5(L"");
     //HSDataBuffer hv6(L"");

     //DEFAULT VALUES FOR HISTOGRAM_INTERVALS TABLE.
     //These values should only be used when base table is empty. Otherwise, we
     //should get values from the generated histogram.
     Int64 intRowCount = 0;
     Int64 intUEC      = 0;
     HSDataBuffer bound(WIDE_("()"));
     double stdDevOfFreq = 0;
     Int64 intMFVRowCount   = 0;
     Int64 intMFV2RowCount  = 0;
     HSDataBuffer mostFreqVal(L"()");
     //Int64 v3 = 0;
     //Int64 v4 = 0;
     //HSDataBuffer v6(L"");

     convertInt64ToAscii(objDef->getObjectUID(), uidStr);

     // If these are compile time stats and the desired sample is less than the
     // default would have been, set reason to SMALL_SAMPLE.
     if (requestedByCompiler)
     {
       // These are compile time stats.
       Int64 defaultSampRows;
       if (actualRowCount >= getMinRowCountForSample())
            defaultSampRows = getDefaultSampleSize(actualRowCount);
       else defaultSampRows = actualRowCount;

       // Set reason to small sample if the actual sample table had less then 90% of the
       // rows that a default sample would have had.  That is, record these as full blown
       // stats if the sample rowcount is pretty close to what the default would have had.
       if (sampleRowCount < defaultSampRows * 0.9)
            compileStatsReason = HS_REASON_SMALL_SAMPLE; // sample size is small.
       else compileStatsReason = HS_REASON_AUTO_INIT;    // sample size similar to default.
     }

     if (singleGroup)
       {
         group = singleGroup;                      /* process single-columns  */
         processMultiGroups = TRUE;                /* then multi-columns      */
       }
     else
       {
         group = multiGroup;                       /* process multi-columns   */
         processMultiGroups = FALSE;               /* only once               */
       }

     // histRS and histintRS are instantiations of the std::auto_ptr template;
     // do NOT try to allocate them from STMTHEAP, or a core will occur when
     // they go out of scope and their underlying ptrs are deleted.
     if (tableFormat == SQLMX)
       {
         // histogram versioning
         if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
         {
 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
           histRS.reset(new HSinsertHist("INSERT101_MX_2300",
                                         hstogram_table->data()));
           histintRS.reset(new HSinsertHistint("INSERT201_MX_2300",
                                               hsintval_table->data()));
 #else // NA_USTAT_USE_STATIC not defined, use dynamic query
           histRS.reset(new HSinsertHist(hstogram_table->data()));
           histintRS.reset(new HSinsertHistint(hsintval_table->data()));
 #endif // NA_USTAT_USE_STATIC not defined
         }
         else
         {
           histRS.reset(new HSinsertHist("INSERT101_MX", hstogram_table->data()));
           histintRS.reset(new HSinsertHistint("INSERT201_MX", hsintval_table->data()));
         }

         if (LM->LogNeeded())
            {
              sprintf(LM->msg, "writeStatistics() via INSERT201_MX_2300");
              LM->Log(LM->msg);
            }
       }
     else
       {
         histRS.reset(new HSinsertHist("INSERT101_MP", hstogram_table->data()));
         histintRS.reset(new HSinsertHistint("INSERT201_MP", hsintval_table->data()));
       }
     LM->StartTimer("initialize rowset for Histograms");
     retcode = histRS->initialize();       //initialize ROWSET for HISTOGRAMS
     LM->StopTimer();
     HSHandleError(retcode);
     LM->StartTimer("initialize ROWSET for HISTOGRAM_INTERVALS");
     retcode = histintRS->initialize();    //initialize ROWSET for HISTOGRAM_INTERVALS
     LM->StopTimer();
     HSHandleError(retcode);

     LM->StartTimer("Create new histograms and histogram intervals");
     while (group != NULL)
       {
         if (group->oldHistid != 0)
           {
                                           /*=================================*/
                                           /*   GATHER OLD HISTOGRAM VALUES   */
                                           /*=================================*/
             sprintf(tempStr, " %u,", group->oldHistid);
             oldHistList += tempStr;
             if (group->oldHistidList != "")
               oldHistList += group->oldHistidList;
           }

         //Histograms have been generated. We need to overwrite the default
         //values with actual values from the generated histogram.
         if (actualRowCount != 0 && group->groupHist != NULL)
           {
             numInts     = group->groupHist->getNumIntervals();
             totalUec    = group->groupHist->getTotalUec();

             if (group->colCount == 1) // processing single group.
             {
               if (DFS2REC::isAnyVarChar(group->colSet[0].datatype))
               {
                 // scale up by 100 to get fraction.
                 avgVarCharSize = (Int64) (group->avgVarCharSize * 100.0);
                 // If all values are empty strings, just add 1 byte to
                 // distinguish  from 0 (zero). A zero varchar size means
                 // stats are from R2.4 and early releases.
                 avgVarCharSize = MAXOF(avgVarCharSize, 1);
               }
               else
                 avgVarCharSize = -1; // non-varchar data types.
             }
             retcode = group->groupHist->getLowValue(lval);
             HSHandleError(retcode);
             retcode = group->groupHist->getHighValue(hval);
             HSHandleError(retcode);
           }

         // Set the reason for this histogram to small sample.
         if (compileStatsReason) group->newReason = compileStatsReason;

         for (i = 0; i < group->colCount; i++)
           {
             col = &group->colSet[i];
                                           /*=================================*/
                                           /*     CREATE NEW HISTOGRAMS       */
                                           /*=================================*/

             // Don't add the row if its new histid is 0 - this would only happen
             // if there was a duplicate histogram for a given column. Instead,
             // log it and proceed. The old histid for the duplicate will still be
             // part of the list of histograms to delete, so the duplicate will
             // be eliminated.
             if (group->newHistid)
               retcode = histRS->addRow(objDef->getObjectUID(),
                                       group->newHistid,
                                       col->position,
                                       col->colnum,
                                       group->colCount,
                                       (short)numInts,
                                       actualRowCount,
                                       totalUec,
                                       (char*)statstime->data(),
                                       lval,
                                       hval,
                                       // the following are columns for automation
                                       group->readTime,
                                       readCount,
                                       sampleSeconds,
                                       group->colSecs + columnSeconds,
                                       samplePercentX100,
                                       group->coeffOfVar,
                                       group->newReason,
                                       // the following is used by compile time stats
                                       compileStatsReason ? sampleRowCount : 0,
                                       avgVarCharSize
                                       // v3-v6 currently unused.
                                       );
             else if (LM->LogNeeded())
               {
                 sprintf(LM->msg,
                         "WARNING: Duplicate histogram %d found (will be removed).",
                         group->oldHistid);
                 LM->Log(LM->msg);
               }

             HSHandleError(retcode);
           }

         for (i = 0; i <= numInts; i++)
           {
                                           /*=================================*/
                                           /* CREATE NEW HISTOGRAM_INTERVALS  */
                                           /*=================================*/
             //Histograms have been generated. We need to overwrite the default
             //values with actual values from the generated histogram.
             if (actualRowCount != 0 && group->groupHist != NULL)
               {
                 intRowCount = group->groupHist->getIntRowCount(i);
                 intUEC      = group->groupHist->getIntUec(i);
                 retcode = group->groupHist->getParenthesizedIntBoundary(i, bound);
                 HSHandleError(retcode);
                 intMFVRowCount  = group->groupHist->getIntMFVRowCount(i);
                 intMFV2RowCount = group->groupHist->getIntMFV2RowCount(i);
                 retcode = group->groupHist->getParenthesizedIntMFV(i, mostFreqVal);
                 HSHandleError(retcode);

                // Calculate standard deviation of frequency for this interval.
                if (intUEC > 1) {
                  double intSumSquare = group->groupHist->getIntSquareSum(i);
                  Int64 intOrigUec    = 0;
                  double intAvg    = 0;
                  intOrigUec = group->groupHist->getIntOrigUec(i);
                  if (intOrigUec != 0  AND sampleRowCount != 0 )
                  {
                    Int64 intOrigRc = 0;
                    Int64 upscale_for_rc = actualRowCount / sampleRowCount;
                    intOrigRc = intRowCount / upscale_for_rc;
                    intAvg = (double)intOrigRc / (double)intOrigUec;
                  }
                  else
                  {
                    intAvg      = (double)intRowCount/(double)intUEC;
                    intOrigUec  = intUEC;
                  }
                  double result = (intSumSquare/double(intOrigUec)) -
                                  ((double)intAvg*intAvg);
                  // make sure result is a positive number
                  result = MAXOF(result, 0.0);
                  stdDevOfFreq = sqrt(result);
                }
                else
                  stdDevOfFreq = 0;
             }

             // Don't add intvl with id 0 -- it is from a duplicate histogram
             // that has snuck in somehow (see comment above for histogram call
             // to addRow()).
             if (group->newHistid)
               retcode = histintRS->addRow(objDef->getObjectUID(),
                                           group->newHistid,
                                           (short)i,
                                           intRowCount,
                                           intUEC,
                                           bound,
                                           stdDevOfFreq,
                                           intMFVRowCount,  // v1
                                           intMFV2RowCount, // v2
                                           0, 0,            // v3, v4 unused
                                           mostFreqVal      // v5
                                           // v6 unused
                                           );
             HSHandleError(retcode);
           }

         checkTime("after creating new histograms");
         group = group->next;
         if (group == NULL && processMultiGroups)
           {
             processMultiGroups = FALSE;
             group = multiGroup;
           }
       } // while
     LM->StopTimer();  // Create new histograms and histogram intervals

     LM->StartTimer("Write out new histograms");
     retcode = histRS->flush();
     LM->StopTimer();
     HSHandleError(retcode);

     LM->StartTimer("Write out new histogram intervals");
     retcode = histintRS->flush();
     LM->StopTimer();
     HSHandleError(retcode);
                                           /*=================================*/
                                           /*   REMOVE DUPLICATE HISTOGRAMS   */
                                           /*=================================*/
     LM->StartTimer("Remove duplicate histograms");
     HSColGroupStruct *dup = dupGroup;
     while (dup != NULL)
       {
         sprintf(tempStr, " %u,", dup->oldHistid);
         dupHistList += tempStr;
         dup = dup->next;
       }
     retcode = HSGlobalsClass::removeHists(dupHistList, uidStr, "DELETE DUPLICATE HISTOGRAMS");
     LM->StopTimer();
     LM->StopTimer();  // Remove duplicate histograms
     HSHandleError(retcode);
                                           /*=================================*/
                                           /*      REMOVE OLD HISTOGRAMS      */
                                           /*=================================*/
     LM->StartTimer("Remove old histograms");
     retcode = HSGlobalsClass::removeHists(oldHistList, uidStr, "DELETE OLD HISTOGRAMS");
     LM->StopTimer();
     return retcode;
   }


 /*=================================*/
 /* Remove histograms.              */
 /*=================================*/
 Lng32 HSGlobalsClass::removeHists(NAString &hists, char *uid, const char *operation)
 {
   Lng32 retcode = 0;
   if (hists.length() > 0)
     {

       HSLogMan *LM = HSLogMan::Instance();
       NAString stmt;
       Int64    xRows = 0;
       char     rowCountStr[30];

       LM->StartTimer("Delete old rows from Histograms table");
       hists.remove(hists.length() - 1);      // remove last comma
       stmt = "DELETE FROM ";
       stmt += hstogram_table->data();
       stmt += " WHERE TABLE_UID = ";
       stmt += uid;
       stmt += " AND HISTOGRAM_ID IN (";
       stmt += hists;
       stmt += ")";
       // Note that this can't be done with retry since we are
       // part of a larger transaction started by FlushStatistics
       retcode = HSFuncExecQuery(stmt, -UERR_INTERNAL_ERROR,
                                  &xRows, operation, NULL, NULL);
       LM->StopTimer();
       HSHandleError(retcode);
       if (LM->LogNeeded())
         {
           convertInt64ToAscii(xRows, rowCountStr);
           sprintf(LM->msg, "\t\t\t%s ROWS DELETED", rowCountStr);
           LM->Log(LM->msg);
         }

       LM->StartTimer("Delete old rows from Histogram_Intervals table");
       stmt = "DELETE FROM ";
       stmt += hsintval_table->data();
       stmt += " WHERE TABLE_UID = ";
       stmt += uid;
       stmt += " AND HISTOGRAM_ID IN (";
       stmt += hists;
       stmt += ")";
       // Note that this can't be done with retry since we are
       // part of a larger transaction started by FlushStatistics
       retcode = HSFuncExecQuery(stmt, -UERR_INTERNAL_ERROR,
                                 &xRows, operation, NULL, NULL);
       LM->StopTimer();
       HSHandleError(retcode);
       if (LM->LogNeeded())
         {
           convertInt64ToAscii(xRows, rowCountStr);
           sprintf(LM->msg, "\t\t\t%s ROWS DELETED", rowCountStr);
           LM->Log(LM->msg);
         }
     }
   checkTime("during removal of histograms");
   return retcode;
 }


 // Perform CURSOR103_... to obtain histograms.  Make list of
 // histograms and return as groupList.  For each histogram
 // read, check to see if it is a duplicate.  If so, add to list
 // HSGlobalsClass::dupGroup and do not put on groupList.   The
 // dupGroup will be removed in WriteStatistics (during regular
 // update statistics).
 // arguments
 //   - skipEmpty: is EXISTING keyword specified
 //   - exclusive: do we need exclusive locks on the accessed rows
 Lng32 HSGlobalsClass::groupListFromTable(HSColGroupStruct*& groupList,
                                          NABoolean skipEmpty,
                                          NABoolean exclusive)
   {
     HSLogMan *LM = HSLogMan::Instance();
     LM->StartTimer("Read histograms, return as HSColGroupStruct list (groupListFromTable())");
     HSColGroupStruct *group = NULL, **lastDupGroup = &dupGroup;
     HSColumnStruct   col;
     NAString columnName;
     ULng32 histid;
     Lng32 colPos, colNum, colCount;
     Int64 objID = objDef->getObjectUID();
     Lng32 retcode = 0;
     char readTime[TIMESTAMP_CHAR_LEN+1];
     char reason = HS_REASON_UNKNOWN;

     // Initialize the pointer to the group list we will build.
     groupList = NULL;

     // if showstats for a native hbase table,hive table or table under seabase schema,
     // need to check if the table SB_HISTOGRAMS exist
     NAString schemaName;
     if (strcmp(hstogram_table->data(), "TRAFODION.\"_HBASESTATS_\".SB_HISTOGRAMS") == 0)
       schemaName = "_HBASESTATS_";
     else if (strcmp(hstogram_table->data(), "TRAFODION.\"_HIVESTATS_\".SB_HISTOGRAMS") == 0)
       schemaName = "_HIVESTATS_";
     else if (strcmp(hstogram_table->data(), "TRAFODION.SEABASE.SB_HISTOGRAMS") == 0)
       schemaName = "SEABASE";
     if (!schemaName.isNull())
       {
         NAString queryStr = "SELECT count(*) FROM TRAFODION.\"_MD_\".OBJECTS WHERE SCHEMA_NAME='" +
                             schemaName +
                             "' AND OBJECT_NAME='SB_HISTOGRAMS' AND OBJECT_TYPE='BT';";
         HSCursor cursor;
         retcode = cursor.prepareQuery(queryStr.data(), 0, 1);
         HSHandleError(retcode);
         retcode = cursor.open();
         HSHandleError(retcode);
         ULng32 cnt;
         retcode = cursor.fetch (1, (void *)&cnt);
         HSHandleError(retcode);
         if (cnt == 0)
           {
             LM->StopTimer();
             return 0;
           }
       }

 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
     HSCliStatement::statementIndex stmt;
     if (tableFormat == SQLMX)
       if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
       {

         if (exclusive)
           stmt = HSCliStatement::CURSOR103_MX_2300_X;
         else
           stmt = HSCliStatement::CURSOR103_MX_2300;
       }
       else
         stmt = HSCliStatement::CURSOR103_MX;
     else
       stmt = HSCliStatement::CURSOR103_MP;

     HSCliStatement cursor103( stmt,
                            (char *)hstogram_table->data(),
                            (char *)&objID
                          );
 #else // NA_USTAT_USE_STATIC not defined, use dynamic query
     char sbuf[25];
     NAString qry = "SELECT HISTOGRAM_ID, COL_POSITION, COLUMN_NUMBER, COLCOUNT, "
                           "cast(READ_TIME as char(19) character set iso88591), REASON "
                    "FROM ";
     qry.append(hstogram_table->data());
     qry.append(    " WHERE TABLE_UID = ");
     sprintf(sbuf, PF64, objID);
     qry.append(sbuf);
     qry.append(    " ORDER BY TABLE_UID, HISTOGRAM_ID, COL_POSITION ");
     qry.append(    " FOR READ COMMITTED ACCESS");

     HSCursor cursor103;
     retcode = cursor103.prepareQuery(qry.data(), 0, 6);
     HSHandleError(retcode);
 #endif // NA_USTAT_USE_STATIC not defined

     retcode = cursor103.open();
     HSHandleError(retcode);

     while (retcode == 0)
         {
           if (tableFormat == SQLMX && HSGlobalsClass::schemaVersion >= COM_VERS_2300)
             retcode = cursor103.fetch (6,
                                       (void *)&histid, (void *)&colPos,
                                       (void *)&colNum, (void *)&colCount,
                                       (void *)&readTime, (void *)&reason
                                       );
           else
             retcode = cursor103.fetch (4,
                                       (void *)&histid, (void *)&colPos,
                                       (void *)&colNum, (void *)&colCount
                                       );
           // Don't read any more (break out of loop) if fetch did not succeed.
           HSFilterWarning(retcode);
           if (retcode)
             break;
           // If EXISTING keyword specified and REASON field is EMPTY, skip.
           if (skipEmpty && reason == HS_REASON_EMPTY)
             continue;

           col = objDef->getColInfo(colNum); // colNum is position in table
           col.colnum = colNum;
           col.position = colPos;

           columnName = objDef->getColName(colNum);

           if (groupList == NULL)  // FIRST GROUP ENTRY
             {
               groupList = new(STMTHEAP) HSColGroupStruct;
               group = groupList;
               group->prev = NULL;
             }
           else                      // APPEND GROUP ENTRY
             {
               group->next = new(STMTHEAP) HSColGroupStruct;
               group->next->prev = group;
               group = group->next;
             }
           group->colSet.insert(col);
           group->oldHistid = histid;
           strncpy(group->readTime, readTime, TIMESTAMP_CHAR_LEN);
           group->readTime[TIMESTAMP_CHAR_LEN] = '\0';
           group->reason = reason;
           group->colCount = colCount;
           *group->colNames += ToAnsiIdentifier(columnName);
           *group->colNames += ",";

           for (Int32 i=1; i < colCount; i++)  // GET ALL RELATED ROWS
             {
               if (tableFormat == SQLMX && HSGlobalsClass::schemaVersion >= COM_VERS_2300)
                 retcode = cursor103.fetch (6,
                                           (void *)&histid, (void *)&colPos,
                                           (void *)&colNum, (void *)&colCount,
                                           (void *)&readTime,(void *)&reason
                                           );
               else
                 retcode = cursor103.fetch (4,
                                           (void *)&histid, (void *)&colPos,
                                           (void *)&colNum, (void *)&colCount
                                           );
               HS_ASSERT(retcode == 0);
               HS_ASSERT(group->oldHistid == histid);

               col = objDef->getColInfo(colNum); // colNum is position in table
               col.colnum = colNum;
               col.position = colPos;
               group->colSet.insert(col);
               columnName = objDef->getColName(colNum);

               *group->colNames += " ";
               *group->colNames += ToAnsiIdentifier(columnName);
               *group->colNames += ",";
             }
           group->colNames->remove(group->colNames->length() - 1); // remove last comma

           if (findDuplicate(group, groupList))
           {
              // Add to HSGlobalsClass removal list.
              *lastDupGroup = group;
              lastDupGroup = &(group->next);
              *lastDupGroup = NULL; // NULL end of dup list.

              group=group->prev; // remove this group.
              group->next = NULL;
           }
         }

 // For the dynamic case, the HSCursor dtor closes the cursor when cursor103
 // goes out of scope.
 #ifdef NA_USTAT_USE_STATIC
     // Don't overwrite the return code if an error has occurred, but attempt to
     // close the cursor anyway (in case it was successfully opened).
     if (retcode < 0)
       cursor103.close();
     else
       retcode = cursor103.close();
 #else
     // Caller may use HSHandleError, which bails out for nonzero retcode.
     if (retcode == 100)
       retcode = 0;
 #endif

     LM->StopTimer();
     return retcode;
   }

 // Determine whether entry is a duplicate within list.  The first occurrence
 // of any duplicated entry will always return FALSE.  All others will return
 // TRUE.
 NABoolean HSGlobalsClass::findDuplicate(const HSColGroupStruct *entry,
                                               HSColGroupStruct *list)
   {
     NABoolean retval = FALSE;
     if (!entry || !list) return retval;

     // Loop through all items that are before entry on the list.  This means
     // that the first of any duplicate entries will always return FALSE,
     // leaving it to be used normally.  This also reduces the amount of
     // comparisons required.
     HSColGroupStruct *listItem = list;
     while (listItem != NULL && listItem != entry)
       {
         if (entry->colSet == listItem->colSet)
           {
             retval = TRUE;
             break;
           }
         listItem = listItem->next;
       }
     return retval;
   }

 HSColGroupStruct* HSGlobalsClass::findGroup(const HSColGroupStruct *tableGroup)
   {
     HS_ASSERT(tableGroup != NULL);
     HSColGroupStruct *group = NULL;
     NABoolean processMultiGroups;

     if (singleGroup)
       {
         group = singleGroup;                      /* process single-columns  */
         processMultiGroups = TRUE;                /* then multi-columns      */
       }
     else
       {
         group = multiGroup;                       /* process multi-columns   */
         processMultiGroups = FALSE;               /* only once               */
       }

     // Find histogram from HISTOGRAMS table in list of histograms specified
     // at command line.  Return NULL if a histogram in HISTOGRAMS table is
     // not specified for regeneration.
     while (group != NULL)
       {
         if (group->colSet == tableGroup->colSet)
           break;

         group = group->next;
         if (group == NULL && processMultiGroups)
           {
             processMultiGroups = FALSE;
             group = multiGroup;
           }
       }

     return group;
   }


 HSColGroupStruct* HSGlobalsClass::findGroup(const Lng32 colnum)
   {
     HSColGroupStruct *group = singleGroup;

     while (group != NULL)
       {
         if (colnum == group->colSet[0].colnum)
           break;

         group = group->next;
       }

     return group;
   }

 //  return the group with the given column number also return the position of
 //  the group in the group list
 HSColGroupStruct* HSGlobalsClass::findGroupAndPos(const Lng32 colnum, Int32 &pos)
   {
     HSColGroupStruct *group = singleGroup;

     pos = 0;
     while (group != NULL)
       {
         if (colnum == group->colSet[0].colnum)
           break;

         pos++;
         group = group->next;
       }

     return group;
   }

 // has all MC groups be processed and computed
 // forIS is used to indicated that the method was called from
 // the MC IS processing code
 NABoolean HSGlobalsClass::allMCGroupsProcessed(NABoolean forIS)
 {
     HSColGroupStruct *mgroup = multiGroup;

     while (mgroup != NULL)
     {
        if ((forIS && (mgroup->state == UNPROCESSED)) ||
           (!forIS && (mgroup->state != PROCESSED)))
        {
           return FALSE;
        }

        mgroup = mgroup->next;

     }

     return TRUE;
 }


 // This function is called by the HS_ASSERT macro to take care of some things
 // before triggering an assertion failure:
 //   - Log the assertion failure if logging is enabled.
 //   - Roll back transaction if one is in progress.
 //   - Put an assertion error in the diagnostics area. This is supposed to be
 //     done by code executed due to the macro HS_ASSERT invokes for the assertion
 //     failure, but it does not always work properly. Doing it here prevents it
 //     from being attempted downstream.
 // The parameters are the text of the assertion, and the file and line at which
 // it occurred.
 void HSGlobalsClass::preAssertionFailure(const char* condition,
                                          const char* fileName,
                                          Lng32 lineNum)
 {
   HSTranMan *TM = HSTranMan::Instance();
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "***[ERROR] INTERNAL ASSERTION (%s) AT %s:%i", condition, fileName, lineNum);
       LM->Log(LM->msg);
     }
   if (TM->StartedTransaction())
     TM->Rollback();
   diagsArea << DgSqlCode(arkcmpErrorAssert)
             << DgString0(condition)
             << DgString1(fileName)
             << DgInt0(lineNum);
 }


 /****************************************************************/
 /* METHOD:  getRetcodeFromDiags()                               */
 /* PURPOSE: Derive the appropriate return code from the contents*/
 /*          of the diagnostics area.                            */
 /* PARAMS:  none                                                */
 /* RETURN:  The return code implied by the errors/warnings in   */
 /*          the diagnostics area.                               */
 /****************************************************************/
 Lng32 HSGlobalsClass::getRetcodeFromDiags()
 {
   // Quick check for empty diagnostics area.
   if (diagsArea.getNumber() == 0)
     return 0;

   Lng32 i;
   Lng32 retcode = 0;
   for (i=1; i<=diagsArea.getNumber(DgSqlCode::ERROR_); i++)
     {
       retcode = diagsArea.getErrorEntry(i)->getSQLCODE();
       HSFilterError(retcode);
       // If we find an error that the filter did not change to -1, return it
       // now (prefer it to one mapped to the generic -1 value).
       if (retcode < -1)
         return retcode;
     }

   if (retcode < 0)
     return retcode;

   // No errors if we get this far. Look at the warnings, and return the first
   // one that HSFilterWarning() does not tell us to ignore.
   for (i=1; i<=diagsArea.getNumber(DgSqlCode::WARNING_); i++)
     {
       retcode = diagsArea.getWarningEntry(i)->getSQLCODE();
       HSFilterWarning(retcode);
       if (retcode)  // if filter didn't replace it with 0
         return retcode;
     }

    HS_ASSERT(!retcode);
    return retcode;
 }


 /****************************************************************/
 /* METHOD:  addGroup()                                          */
 /* PURPOSE: Add the passed group to either the single-column    */
 /*          group or the multicolumn group, depending on its    */
 /*          column count.                                       */
 /* PARAMS:  group(in) -- Pointer to the group to add to one of  */
 /*                       the lists.                             */
 /* ASSUMPTIONS: The group is a detached node and not part of a  */
 /*              list.                                           */
 /****************************************************************/
 void HSGlobalsClass::addGroup(HSColGroupStruct *group)
   {
     HS_ASSERT(group->next == NULL && group->prev == NULL);

     // Link the group in at the front of the appropriate list.
     if (group->colCount == 1)
       {
         if (singleGroup == NULL)   // first group entry
             singleGroup = group;
         else                       // append to front of list
           {
             group->next = singleGroup;
             singleGroup->prev = group;
             singleGroup = group;
           }
         singleGroupCount++;    // count of single-col groups
       }
     else
       {
         if (multiGroup == NULL)    // first group entry
           multiGroup = group;      // no separate count for MC groups
         else                       // append to front of list
           {
             group->next = multiGroup;
             multiGroup->prev = group;
             multiGroup = group;
           }
       }

     groupCount++;                  // overall group count
   }

 // Remove the passed group from the appropriate group list, and deallocate it.
 void HSGlobalsClass::removeGroup(HSColGroupStruct* groupToRemove)
 {
   if (!groupToRemove)
     return;

   HSColGroupStruct* group = (groupToRemove->colCount == 1 ? singleGroup : multiGroup);
   while (group && group != groupToRemove)
     group = group->next;

   // If the group was found, unlink it from list and deallocate.
   if (group)
     {
       if (group->next)
         group->next->prev = group->prev;

       if (group->prev)
         group->prev->next = group->next;
       else if (group->colCount == 1)
         {
           HS_ASSERT(singleGroup == group);
           singleGroup = group->next;
         }
       else
         {
           HS_ASSERT(multiGroup == group);
           multiGroup = group->next;
         }

       // Make group isolated from list, or deleting it will cause further
       // deletions.
       group->next = group->prev = NULL;

       // Group has been detached from list, now delete it.
       delete group;
     }
 }

 /****************************************************************************/
 /* METHOD:  removeGroups()                                                  */
 /* PURPOSE: Remove groups from the front (most recently added) of both the  */
 /*          single- and multi-group lists until arriving at the passed stop */
 /*          nodes. This rolls back the state of the lists to a point before */
 /*          a set of new nodes were added.                                  */
 /* PARAMS:  numGroupsToRemove -- Number of groups to remove. This is not    */
 /*                  strictly necessary, but provides a correctness check.   */
 /*          oldSingle -- Node of singleGroup at which to stop. This will be */
 /*                       the new head of the list of single-column groups.  */
 /*          oldMulti -- Node of multiGroup at which to stop. This will be   */
 /*                      the new head of the list of multiple-column groups. */
 /*                      the lists.                                          */
 /* RETURN:  TRUE if successful, FALSE otherwise                             */
 /****************************************************************************/
 NABoolean HSGlobalsClass::removeGroups(Lng32 numGroupsToRemove,
                                        HSColGroupStruct* oldSingle,
                                        HSColGroupStruct* oldMulti)
   {
     HSLogMan *LM = HSLogMan::Instance();
     HSColGroupStruct* groupToRemove;

     // Remove up to numGroupsToRemove nodes from front of single-group list
     // until stop node is encountered.
     while (singleGroup && singleGroup != oldSingle && numGroupsToRemove > 0)
       {
         groupToRemove = singleGroup;
         singleGroup = singleGroup->next;
         if (singleGroup)
           singleGroup->prev = NULL;
         groupToRemove->next = NULL;  // dtor will try to delete next
         delete groupToRemove;
         numGroupsToRemove--;
         groupCount--;
         singleGroupCount--;
       }

     // Remove nodes from front of multi-group list.
     while (multiGroup && multiGroup != oldMulti && numGroupsToRemove > 0)
       {
         groupToRemove = multiGroup;
         multiGroup = multiGroup->next;
         if (multiGroup)
           multiGroup->prev = NULL;
         groupToRemove->next = NULL;  // dtor will try to delete next
         delete groupToRemove;
         numGroupsToRemove--;
         groupCount--;
       }

     // We should have counted down to 0 groups to remove, and the new list heads
     // should match the targets that were passed in. If either of these conditions
     // is not met, return FALSE.
     if (numGroupsToRemove > 0)
       {
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "removeGroups() failed: %d groups not removed",
                              numGroupsToRemove);
             LM->Log(LM->msg);
           }
         return FALSE;
       }
     else if (oldSingle != singleGroup || oldMulti != multiGroup)
       {
         if (LM->LogNeeded())
           LM->Log("removeGroups() failed: not enough groups deleted");
         return FALSE;
       }

     return TRUE;
   }

 static Int32 recDepth = 0;
 static Int32 maxRecDepth = 0;


 /************************************************/
 /* METHOD:   updateMCMFV                        */
 /* PURPOSE:  we have a new distinct value       */
 /*           update the uec and MFV values      */
 /* PARAMS:   mfvc1 - number of rows for mfv1    */
 /*           mfvc2 - number of rows for mfv2    */
 /*           nRowsSqr - row count square for    */
 /*                      all distinct values     */
 /*           uec - uec value                    */
 /*           mfvi - index of mfv value          */
 /*           newCount - row count of ditinct    */
 /*                      value                   */
 /*           newIndex - index of distinct value */
 /* RETCODE:  none                               */
 /************************************************/

 void updateMCMFV (Int32& mfvc1, Int32& mfvc2, double& nRowsSqr,
                   Int64& uec, Int32& mfvi, Int32 newCount, Int32 newIndex)
 {
     nRowsSqr += newCount*newCount;
     uec++;

     if (newCount > mfvc1)
     {
        mfvc2 = mfvc1;
        mfvc1 = newCount;
        mfvi = newIndex;
     }
     else if (newCount > mfvc2)
        mfvc2 = newCount;
 }

 /*************************************************/
 /* METHOD:   computeMCISuec                      */
 /* PURPOSE:  a fast path way of computing stats  */
 /*           for an MC                           */
 /* PARAMS:   mgroup  - MC to compute stats for   */
 /*           MCrows  - the data of the MC        */
 /*                     encapsulated by MCWrapper */
 /*                     objects                   */
 /*           allRows -   number of rows to       */
 /*                       process                 */
 /*           numIntervals - number if intervals  */
 /*           samplingUsed - is sampling used     */
 /*                          to create            */
 /* RETCODE:  none                                */
 /*************************************************/

 void computeMCISuec(HSColGroupStruct *mgroup, MCWrapper* MCrows, NABoolean samplingUsed,
                     Int32 allRows, Int32 numIntervals)
 {
    Int64 uec = 0;
    Int32 cnt = 1;
    double nRowsSqr = 0;

    HSLogMan *LM = HSLogMan::Instance();

    // most frequest values
    Int32 mfvc1 = 0;
    Int32 mfvc2 = 0;
    Int32 mfvi = 0;

    Int32 nulls = mgroup->nullCount;
    Int32 nRows = allRows - nulls;

    // with data already sorted, get the distinct values and compute
    // uec and MFV values
    if (nRows >  0)
    {
       Int32 i = 0;
       for (; i < nRows - 1; i++)
       {
          if (MCrows[i].index_ != MCrows[i+1].index_)
          {
            updateMCMFV (mfvc1, mfvc2, nRowsSqr, uec, mfvi, cnt,i);
            cnt = 0;
          }
          cnt++;
       }

       updateMCMFV (mfvc1, mfvc2, nRowsSqr, uec, mfvi, cnt,i);
    }

    // include nulls in MFV computation, if all nulls used the last row in the sorted list
    // as the MFV since nulls are sorted higher
    if (nulls)
    {
       updateMCMFV (mfvc1, mfvc2, nRowsSqr, uec, mfvi, nulls,allRows - 1);
    }

    if (LM->LogNeeded())
    {
       sprintf(LM->msg, "\tMC: using IS: allrowcount is (%d) null row count is (%d)"
                        " mfvc1 is (%d) mvfc2 is (%d) hv index is (%d) uec is (%ld)",
                        allRows, nulls, mfvc1, mfvc2, mfvi, uec);
       LM->Log(LM->msg);
    }

    // for MC the number of intervals is "1"
    Int32 intCount = 1;
    mgroup->groupHist = new(STMTHEAP) HSHistogram(intCount, nRows+MCrows->nullCount_, 0, 0, samplingUsed, FALSE);

    NAWString wStr = WIDE_("");
    myVarChar vc = myVarChar();
    vc.len = wStr.length()*2;
    memmove(vc.val, wStr.data(), vc.len);

    // create  one interval with dummy values
    mgroup->groupHist->addIntervalData(vc, mgroup, nulls, 0, 0, FALSE);

    mgroup->groupHist->setIntSquareSum(1, nRowsSqr);
    mgroup->groupHist->setIntRowCount(1, allRows);
    mgroup->groupHist->setIntMFVRowCount(1, mfvc1);
    mgroup->groupHist->setIntMFV2RowCount(1, mfvc2);
    mgroup->groupHist->setIntUec(1, uec);

    HSDataBuffer mfvBoundary;
    setBufferValue(MCrows[mfvi], mgroup, mfvBoundary);
    mgroup->groupHist->setIntMFVValue(1, mfvBoundary.data(), mfvBoundary.length());
 }

 /***********************************************/
 /* METHOD:  ComputeMCStatistics()              */
 /* PURPOSE: Estimates the UEC for multi-columns*/
 /* PARAMS:  usingIS - was this method called   */
 /*                    from the IS logic so IS  */
 /*                    can be attempted to      */
 /*                    compute MC stats         */
 /* RETCODE: 0 - successful                     */
 /* ASSUMPTIONS: For every multi-column group   */
 /*        there must be a corresponding SC     */
 /*        group. For example, For MC(A,B) the  */
 /*        corresponding SC(A) and SC(B) must   */
 /*        exist.                               */
 /*        If sampling was used, the SC uec and */
 /*        rowcount must have been extrapolated */
 /*        prior to calling this method.        */
 /***********************************************/
 Lng32 HSGlobalsClass::ComputeMCStatistics(NABoolean usingIS)
 {
     NABoolean collectMCSkewedValues = FALSE;
     Lng32 retcode = 0;
     HSColGroupStruct *mgroup = multiGroup;
     HSColumnStruct   *col;
     HSColGroupStruct *sgroup;
     NAWString low, hi;
     HSDataBuffer boundLo, boundHi;
     HSCursor *cursor;
     HSLogMan *LM = HSLogMan::Instance();

     while (mgroup != NULL)
     {
         // is this MC covered by columns in memory so IS can be used
         NABoolean coveredByIS = TRUE;

         // skip the group if we already processed it
         // or Internal Sort (IS) is used and this group cannot be used with IS
         if ((mgroup->state == PROCESSED) ||
             (usingIS && (mgroup->state == DONT_TRY)))
         {
            mgroup = mgroup->next;
            continue;
         }

         Lng32 colCount = mgroup->colCount;

         // Check if skewValuesCollected flag is set via syntax.
         collectMCSkewedValues = (mgroup->skewedValuesCollected ||
                          (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON));

         if(collectMCSkewedValues)
         {
           const Lng32 maxCharBoundaryLen = (Lng32) CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_BOUNDARY_LEN);
           Lng32 totalBoundaryLen = (HS_MAX_UCS_BOUNDARY_CHAR - (colCount -1) * 3);
           if(colCount * maxCharBoundaryLen >= totalBoundaryLen)
             collectMCSkewedValues = FALSE;

           // Dont collect skewed values if the column
           // group consists of a member of the decimal class, the interval class
           // or the float class, since these classes are not supported by SB.
           for (Int32 i=0; i<colCount; i++)
           {
             HSColumnStruct &col = mgroup->colSet[i];
             if (DFS2REC::isInterval(col.datatype) ||
                 DFS2REC::isFloat(col.datatype))
             {
               collectMCSkewedValues = FALSE;
               break;
             }
           }
           mgroup->skewedValuesCollected = collectMCSkewedValues;
         }

         if ( performISForMC() && usingIS)
         {
           // check if the data needed by all columns in the MC is already in
           // memory so IS can be used
           HSColGroupStruct *sgroup2;
           for (Int32 i=0; i<colCount; i++)
           {
              col = &mgroup->colSet[i];
              sgroup2 = findGroup(col->colnum);
              if ((sgroup2->state != PROCESSED) || (sgroup2->mcis_memFreed == TRUE))
              {
                coveredByIS = FALSE;
                break;
              }
           }
         }
         else
           coveredByIS = FALSE;

         // compute the MC stats using Internal Sort
         if (coveredByIS)
         {
             if (LM->LogNeeded())
             {
                char title[1000];
                sprintf(title, "MC: Compute using Internal Sort (%s) with%sskew",
                                mgroup->colNames->data(), collectMCSkewedValues? " " : " NO ");
                LM->StartTimer(title);
                (void)getTimeDiff(TRUE);
             }

             mgroup->state = PENDING;

             Int32 numRows = 0;
             //make sure that all columns have the same number of rows and get back this number
             NABoolean rowCountMatch = checkAllColsHaveSameNumOfRows(mgroup, numRows);

             // number of rows in each group should match
             HS_ASSERT(rowCountMatch);

             MCWrapper::setupMCIterators(mgroup, numRows);
             MCWrapper* tempData = newObjArr(MCWrapper, numRows);

             if (!tempData)
                throw ISMemAllocException();

             // are columns forming the MC nullable
             NABoolean allColsNullable = tempData->areAllMCColsNullable();

             // reset the number of nulls
             tempData->nullCount_ = 0;

             Int32 effectiveNumRows = 0;
             // setup the index value for the MCWrapper objects
             for (Int32 i =0; i < numRows; i++)
             {
                // MC null processing
                // increment the number of nulls if this is a null row
                // and skip it from the sort phase
                if (allColsNullable && tempData->areAllMCColsNull(i))
                {
                   tempData->nullCount_++;
                   // place the null rows at the end and don't sort them
                   tempData[numRows-tempData->nullCount_].setIndex(i);

                }
                else
                  tempData[effectiveNumRows++].setIndex(i);
             }

             mgroup->nullCount = tempData->nullCount_;

             if (LM->LogNeeded())
             {
                sprintf(LM->msg, "\tMC: After set up and null processing: total rows (%d) nulls (%d) non nulls (%d)",
                        numRows, tempData->nullCount_, effectiveNumRows);
                LM->Log(LM->msg);
             }

             // Initiate sort for specific type by calling the quicksort template function.
             recDepth = maxRecDepth = 0;

             quicksort((MCWrapper*)tempData, 0, effectiveNumRows-1);
             recDepth = maxRecDepth = 0;

             mgroup->data = tempData;
             mgroup->nextData = tempData + numRows;

             if (mgroup->skewedValuesCollected)
             {
                if (LM->LogNeeded())
                {
                   sprintf(LM->msg, "\tMC: generating histogram after Internal sort - skew values ARE collected");
                   LM->Log(LM->msg);
                }

                if (effectiveNumRows)
                  createHistogram(mgroup, intCount, numRows, samplingUsed, (MCWrapper*)NULL);
                else
                {
                  // If the column has all NULLs, then groupHist will not have been allocated
                  // in the call to CreateHistogram.  So, create it here.
                    mgroup->groupHist = new(STMTHEAP) HSHistogram(intCount,
                                                                  mgroup->nullCount,
                                                                  0, // numGapIntervals
                                                                  0, // numHighFreqIntervals
                                                                  samplingUsed,
                                                                  FALSE //singleIntervalPerUec
                                                                 );
                    mgroup->groupHist->addNullInterval(mgroup->nullCount, mgroup->colCount);
                    // NOTE: add a new method for MC so the MFV and nMFV are also captured
                }
             }
             else
             {
                if (LM->LogNeeded())
                {
                   sprintf(LM->msg, "\tMC: generating histogram after Internal sort - skew values are NOT collected");
                   LM->Log(LM->msg);
                }

                // temporary CQD used for testing to make sure that fast path stats computation is
                // correct - should remove after validation
                if ( performISForMC() )
                   computeMCISuec (mgroup, tempData, samplingUsed, numRows, intCount);
                else
                {
                   createHistogram(mgroup, 1, numRows, samplingUsed, (MCWrapper*)NULL);
                }
             }

             // free up memory used by the MC group to support IS
             tempData->freeColsMem();
             delObjArr(tempData, MCWrapper);

             // free-up the single col memory
             HSColGroupStruct *sgroup2;
             for (Int32 i=0; i<mgroup->colCount; i++)
             {
                HSColumnStruct &col = mgroup->colSet[i];
                sgroup2 = findGroup(col.colnum);
                sgroup2->mcs_usingme--;
                if (sgroup2->mcs_usingme <= 0)
                   sgroup2->freeISMemory(TRUE, TRUE);
             }

             // flag the group as processed by IS
             mgroup->state = PROCESSED;
             mgroup->mcis_groupWeight.clear();

             if (LM->LogNeeded())
             {
                LM->StopTimer();
             }

             // done computing MC stats using IS
         }
         else if (!usingIS) // regular path to compute stats using SQL
         {
             if (LM->LogNeeded())
             {
                char title[1000];
                sprintf(title, "MC: Compute using SQL (%s) with%sskew",
                                mgroup->colNames->data(), collectMCSkewedValues? " " : " NO ");
                LM->StartTimer(title);
                (void)getTimeDiff(TRUE);
             }

             NAString columnName = "", mgroupColNames = "",dblQuote="\"";;
 	    for (Int32 i=0; i<mgroup->colCount; i++)
 	    {
 	      HSColumnStruct &col = mgroup->colSet[i];
 	      columnName = ToAnsiIdentifier(col.colname->data());
 	      // Surround column name with double quotes, if not already delimited.
 	      if (columnName.data()[0] != '"')
 	        columnName=dblQuote+columnName+dblQuote;
 	      mgroupColNames.append(columnName);
 	      if(i < colCount-1)
 	        mgroupColNames.append(",");
 	    }

             /*=================================*/
             /*   CALCULATE MULTI-COLUMN UEC    */
             /*=================================*/
             mgroup->clistr->append("SELECT FMTVAL, SUMVAL FROM (SELECT ");
             mgroup->clistr->append(mgroupColNames);
             if(collectMCSkewedValues)
             {
               mgroup->clistr->append(mgroup->generateTextForColumnCast());
               mgroup->clistr->append(", COUNT(*) FROM ");
             }
             else
               mgroup->clistr->append(", _ucs2'unused', COUNT(*) FROM ");
             mgroup->clistr->append(hssample_table->data());

             Int64 hintRowCount = 0;
             if (sampleTableUsed)
             {
               hintRowCount = sampleRowCount;
             }
             else
             {
               hintRowCount = actualRowCount;
             }

             char cardHint[50];
             sprintf(cardHint, " <<+ cardinality %e >> ", (double)hintRowCount);
             mgroup->clistr->append(cardHint);

             if (samplingUsed && !sampleTableUsed)
                mgroup->clistr->append(sampleOption->data());

             mgroup->clistr->append(" GROUP BY ");
             mgroup->clistr->append(mgroupColNames);
             mgroup->clistr->append(" FOR READ UNCOMMITTED ACCESS) T(");
             mgroup->clistr->append(mgroupColNames);
             mgroup->clistr->append(", FMTVAL, SUMVAL)");
             if(collectMCSkewedValues)
             {
               mgroup->clistr->append(" ORDER BY ");
               mgroup->clistr->append(mgroupColNames);
             }

             cursor = new(STMTHEAP) HSCursor;
             retcode = cursor->fetchBoundaries(mgroup,
                                               sampleRowCount,
                                               intCount,
                                               samplingUsed);
             mgroup->colSecs = getTimeDiff();
             if (LM->LogNeeded())
             {
                LM->StopTimer();
             }
             delete cursor;
             HSHandleError(retcode);

         }

         // Determine boundary values based on single column histograms for MC Group
         // if actual boundary values were not collected.
         if(((!usingIS) || (coveredByIS)) && !collectMCSkewedValues)
         {
                                                  /*==============================*/
                                                  /*= DETERMINE BOUNDARY VALUES  =*/
                                                  /*==============================*/
             low = WIDE_("");
             hi  = WIDE_("");
             NABoolean lowIsFull = FALSE;
             NABoolean hiIsFull = FALSE;
             for (Int32 i=0; i<mgroup->colCount && !(lowIsFull && hiIsFull); i++)
             {
                col = &mgroup->colSet[i];

                sgroup = findGroup(col->colnum);
                HS_ASSERT(sgroup != NULL);
                retcode = sgroup->groupHist->getLowValue(boundLo, FALSE);
                HSHandleError(retcode);

                retcode = sgroup->groupHist->getHighValue(boundHi, FALSE);
                HSHandleError(retcode);

                //if BigNum, cast to double
                //because a boundary can have only 250 characters
                //and it is not large enough to save 2 BigNum numbers with 128 precision.
                //each double precision number needs only 25 characters.
                if (DFS2REC::isBigNum(sgroup->colSet[0].datatype))
                {
                  myVarChar vc = myVarChar();
                  vc.len = boundLo.length();
                  memmove(vc.val, boundLo.data(), vc.len);
                  retcode = doubleToHSDataBuffer(ucsToDouble(&vc), boundLo);
                  HSHandleError(retcode);

                  vc.len = boundHi.length();
                  memmove(vc.val, boundHi.data(), vc.len);
                  retcode = doubleToHSDataBuffer(ucsToDouble(&vc), boundHi);
                  HSHandleError(retcode);
                }

                //10-031023-0696: make sure that the final boundary value is within
                //the length constraint of the HISTOGRAM tables. Three extra
                //characters must be considered:
                //               1 - comma separator
                //               2 - open and close parenthesis
                if (!lowIsFull && ((low.length() + boundLo.numChars() + 3) < HS_MAX_UCS_BOUNDARY_CHAR))
                {
                    low.append((NAWchar*)boundLo.data(), boundLo.length() / sizeof(NAWchar));
                    low.append(WIDE_(","));
                }
                else
                   lowIsFull = TRUE;
                if (!hiIsFull && ((hi.length() + boundHi.numChars() + 3) < HS_MAX_UCS_BOUNDARY_CHAR))
                {
                    hi.append((NAWchar*)boundHi.data(), boundHi.length() / sizeof(NAWchar));
                    hi.append(WIDE_(","));
                }
                else
                   hiIsFull = TRUE;

             } //end for: column in group

             //remove extra comma separator between column names and copy result
             //to low and high boundaries
             low.remove(low.length() - 1);
             boundLo = low.data();
             hi.remove(hi.length() - 1);
             boundHi = hi.data();

             HS_ASSERT(mgroup->groupHist != NULL);
             retcode = mgroup->groupHist->updateMCInterval(boundLo, boundHi);
             HSHandleError(retcode);
         }

         mgroup = mgroup->next;
     }//end while: group

     return retcode;
 }


 /***********************************************/
 /* METHOD:  FixSamplingCounts()                */
 /* PURPOSE: When sampling is used, this        */
 /*          method is called to estimate the   */
 /*          UEC for intervals and histograms.  */
 /* RETCODE: 0 - successful                     */
 /* PROCESS: Basically, the UEC and ROWCOUNT of */
 /*          every interval is increase by some */
 /*          extrapolated value. There are two  */
 /*          extrapolation methods that are used*/
 /*          linear and root extrapolation.     */
 /*          Newton-Raphson Method is used for  */
 /*          root extrapolation.                */
 /***********************************************/
 Lng32 HSGlobalsClass::FixSamplingCounts(HSColGroupStruct *group)
   {
     Lng32 retcode = 0;
     Lng32 i, j;
     Lng32 lastInterval;
     Int64 preSumRC, preSumUec, postSumRC, postSumUec, rowAdj, rows, uec;
     Lng32 intJoinCount = 0;
     Lng32 start = -1;
     double combRows  = 0, combUec   = 0, combUecRatio;
     double estRow    = 0, estUec    = 0;
     double estSubRow = 0, estSubUec = 0;
     double lower     = 0;
     const double UEC_FRACTION_UPPER = 0.975;
     const double FRACTION_HIGH = 0.981;
     const double UPSCALE_FOR_ROWS = convertInt64ToDouble(actualRowCount) / sampleRowCount;
     const Lng32 MAX_INTERVAL_JOIN = 4;
     NABoolean processMultiGroups = TRUE;

     HSHistogram *groupHist = NULL;
     char rowCountStr[30];
     char uecStr[30];
     HSLogMan *LM = HSLogMan::Instance();

     // get defaults that tell us which estimator to use, and
     // the max Dsh parameter for the LWC estimator
     //
     NABoolean useLWCEstimator =
       (CmpCommon::getDefault(USTAT_FORCE_MOM_ESTIMATOR) == DF_OFF);
     double DshMax = CmpCommon::getDefaultNumeric(USTAT_DSHMAX);

     if (LM->LogNeeded())
       {
         LM->Log("\nFixing sampling counts");

         if (useLWCEstimator)
           {
             sprintf(LM->msg, "Using LWC estimator, DshMax %5.1f", DshMax);
             LM->Log(LM->msg);
             LM->Log("Logging both MOM and LWC UEC estimates\n");
           }
         else
           {
             LM->Log("Using MOM estimator");
             LM->Log("Logging only MOM UEC estimates\n");
           }
       }

         groupHist = group->groupHist;
         HS_ASSERT(groupHist != NULL);

         double uecMomTot=0, uecLwcTot=0, uecUjTot=0, uecShTot=0, coeffOfVarTot=0;

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "Estimating UEC for column group [%s]\n",
                     group->colNames->data());
             LM->Log(LM->msg);
             LM->Log("Interval(s)    Dlwc        Dsh        Duj    CofV         Dmom");
             LM->Log("-----------  ---------  ---------  --------- ---- --------------------");
           }

         lastInterval = groupHist->getNumIntervals();
         groupHist->getTotalCounts(preSumRC, preSumUec);

         Int32 estimateCnt=0; // Maintain count of times that UEC was estimated.  This
                            // count is used to obtain an average skew (CV) across the
                            // entire histogram.

         for (i=1; i <= lastInterval; i++) {
             groupHist->setIntOrigRC(i, groupHist->getIntRowCount(i));
             groupHist->setIntOrigMFV(i, groupHist->getIntMFVRowCount(i));
         }

         for (i=1; i <= lastInterval; i++)
           {
             // -----------------------------------------------------------------
             // Loop through the intervals for a histogram.  When a sampled interval
             // has UEC > (UEC_FRACTION_UPPER * # rows in sample interval), perform
             // UEC estimation across multiple intervals.  Continue combining intervals
             // while this is true AND the # of intervals combined has not exceeded
             // MAX_INTERVAL_JOIN.
             // -----------------------------------------------------------------

             groupHist->setIntOrigUec(i, groupHist->getIntUec(i));

             combRows += convertInt64ToDouble(groupHist->getIntRowCount(i)); // Not upscaled.
             combUec  += convertInt64ToDouble(groupHist->getIntUec(i));
             intJoinCount++;

             // Scale up all rowcounts for this interval. For most frequent
             // values, don't scale up if UEC = row count.
             if (groupHist->getIntUec(i) != groupHist->getIntRowCount(i)) {
               groupHist->setIntMFVRowCount(i, (Int64)((double)groupHist->getIntMFVRowCount(i)*UPSCALE_FOR_ROWS));
               groupHist->setIntMFV2RowCount(i, (Int64)((double)groupHist->getIntMFV2RowCount(i)*UPSCALE_FOR_ROWS));
             }

             // Attempt to combine intervals for UEC estimation.
             // If possible, then will immediately loop back.
             if ((i < (lastInterval - 1)) &&
                 (intJoinCount < MAX_INTERVAL_JOIN) &&
                 (combUec >= (UEC_FRACTION_UPPER * combRows)))
               {
                 if (start < 1)
                   start = i;
                 continue;
               }

             // If the interval after this one is the last non-null interval,
             // and that interval has UEC > (UEC_FRACTION_UPPER *
             // # rows in sample interval), then combine this one with that one
             // (and we'll ignore the MAX_INTERVAL_JOIN limit for that one).
             if ( ( ((i == lastInterval - 2) && groupHist->hasNullInterval()) ||
                    ((i == lastInterval - 1) && !groupHist->hasNullInterval())  ) &&
                  (groupHist->getIntUec(i+1) > UEC_FRACTION_UPPER * groupHist->getIntRowCount(i+1))  )
               {
                 if (start < 1)
                   start = i;
                 continue;
               }

             intJoinCount = 0;

             // Cannot combine any more intervals, adjust current interval with calculated ratio.
             combUecRatio = combUec / combRows;
             estRow = UPSCALE_FOR_ROWS * combRows;

             // -----------------------------------------------------------------
             // SPECIAL CASE: Processing the interval that contains NULLs
             //               Do not adjust UEC
             // -----------------------------------------------------------------
             if (i == lastInterval && groupHist->hasNullInterval())
               {
                 groupHist->setIntRowCount(i, (Int64)estRow);

                 if (LM->LogNeeded())
                   {
                     // log UEC estimate for this interval.  the uec estimate in
                     // the full table interval is the same as the uec in the
                     // sample interval, because this is the special last interval
                     // that contains only null values.  in other words, if we found
                     // one unique value (NULL) in the sample, we expect one unique
                     // value (NULL) to be in the full table as well.
                     //
                     sprintf(LM->msg, "   %5d    (last interval) contains NULLs, not changing UEC", i);
                     LM->Log(LM->msg);
                   }
               }
             else // Not processing the last interval as a NULL interval.
                  // The general case.
               {
                 double Duj=0;
                 double Dsh=0;
                 double Dlwc=0;
                 double Dmom=0;
                 double intCoeffOfVar=0;

                 NABoolean DlwcAvailable = false;
                 NABoolean DmomAvailable = false;
                 NABoolean DmomEstCalled = false;

                 estimateCnt++;

                 // Estimate the UEC for this set of intervals.  There are 2 cases:
                 //  1. UEC of sample < sampled rows in set of intervals.
                 //  2. UEC of sample = sampled rows in set of intervals.
                 if (combUec < combRows)
                   {
                     // If the total sampled UEC in this set of intervals is not equal
                     // to the sampled number of rows for these intervals:
                     // estimate the number of distinct values in the entire table for
                     // the current interval (or intervals), based on the row count and
                     // number of distinct values in the current sample interval(s).

                     if ((LM->LogNeeded() || useLWCEstimator) && groupHist->fi(0))
                       {
                         // Use a UEC estimator that is a linear weighted combination
                         // of the unsmoothed jackknife and Shlosser methods.
                         //
                         FrequencyCounts *f;

                         if (start < 1)
                           // single interval estimation; use interval i's fi counts
                           //
                           f = groupHist->fi(i);
                         else
                           {
                             // multiple interval estimation; merge fi counts
                             // of intervals start..i into work fi (f0)
                             //
                             f = groupHist->fi(0);
                             f->reset();
                             for (j=start; j<=i; j++)
                               groupHist->fi(j)->mergeTo(*f);
                           }

                         estUec = lwcUecEstimate(combUec, combRows, estRow, f,
                                                 DshMax, intCoeffOfVar, Duj, Dsh);

                         Dlwc = estUec;
                         DlwcAvailable = true;
                         uecLwcTot += estUec;
                         uecUjTot += Duj;
                         uecShTot += Dsh;
                         coeffOfVarTot += intCoeffOfVar;
                       }

                     if (LM->LogNeeded() || !useLWCEstimator)
                       {
                         estUec = (estRow / combRows) * combUec;

                         //Experimentation using TPCD2X ORDERS and LINEITEM tables,
                         //tells us that if the UEC ratio is between 0.1 and 0.5, we
                         //should NOT call xValue() to get the root value. It will
                         //produce very low UECs. We have added two new CQD for
                         //flexibility.
                         if (combUecRatio >= CmpCommon::getDefaultNumeric(USTAT_UEC_HI_RATIO) ||
                             combUecRatio <= CmpCommon::getDefaultNumeric(USTAT_UEC_LOW_RATIO))
                           {
                             lower  = xValue(combUec, combRows);
                             DmomEstCalled = true;

                             if (combUec <= (combRows * FRACTION_HIGH) &&
                                 lower < estUec)
                               estUec = lower;
                           }
                         uecMomTot += estUec;

                         Dmom = estUec;
                         DmomAvailable = true;
                       }

                     // reset estUec to Dlwc, if Dlwc is available and we are
                     // supposed to use it
                     //
                     if (DlwcAvailable && useLWCEstimator)
                       estUec = Dlwc;

                     if (LM->LogNeeded())
                       {
                         //
                         // log UEC estimates for this interval, or intervals.
                         // for the MOM estimator, also indicate whether we actually
                         // called the estimator (denoted "mom est" in the output),
                         // or if we just scaled up the sample UEC by the sample
                         // fraction (denoted "scaleup" in the output).
                         //
                         char DlwcStr[16], DshStr[16], DujStr[16];
                         if (DlwcAvailable)
                           {
                             sprintf(DlwcStr,"%10.0f",Dlwc);
                             sprintf(DshStr,"%10.0f",Dsh);
                             sprintf(DujStr,"%10.0f",Duj);
                           }
                         else
                           {
                             sprintf(DlwcStr,"%10s"," ");
                             sprintf(DshStr,"%10s"," ");
                             sprintf(DujStr,"%10s"," ");
                           }
                         char DmomStr[32];
                         if (DmomAvailable)
                           sprintf(DmomStr,"%10.0f (%s)",Dmom,
                                   DmomEstCalled ? "mom est" : "scaleup");
                         else
                           sprintf(DmomStr,"%10s"," ");

                         if (start < 1)
                           sprintf(LM->msg, "   %5d    %10s %10s %10s %4.2f %10s",
                                   i, DlwcStr, DshStr, DujStr, intCoeffOfVar, DmomStr);
                         else
                           sprintf(LM->msg, "   %2d-%2d    %10s %10s %10s %4.2f %10s",
                                   start, i, DlwcStr, DshStr, DujStr, intCoeffOfVar, DmomStr);
                         LM->Log(LM->msg);
                       }
                   } // End UEC of sample != rows of sample
                 else
                   {
                     // UEC of sample = rows of sample
                     estUec = estRow;
                     lower  = estRow;

                     uecLwcTot += estUec;
                     uecMomTot += estUec;

                     if (LM->LogNeeded())
                       {
                         //
                         // log UEC estimates for this interval, or intervals. the
                         // estimation that is done here involves setting the estimated
                         // UEC in the full table to the estimated rowCount in the full
                         // table, because the uec in the sample for this interval was
                         // the same as the row count in this sample interval.  in other
                         // words, the values in the sample interval were unique, so we
                         // conclude that the values in the full table interval will be
                         // unique as well
                         //
                         if (start < 1)
                           sprintf(LM->msg, "   %5d    uec==rc, no est; uec: %10.0f",
                                   i, estUec);
                         else
                           sprintf(LM->msg, "   %2d-%2d    uec==rc, no est; uec: %10.0f",
                                   start, i, estUec);
                         LM->Log(LM->msg);
                       }
                   }
                                               /*==============================*/
                                               /*    ADJUST SINGLE INTERVAL    */
                                               /*==============================*/

                 if (start < 1)
                   {
                     if (estUec > estRow)
                       {
                         estUec = MINOF(estRow, convertInt64ToDouble(LLONG_MAX));
                         if (LM->LogNeeded())
                           {
                             sprintf(LM->msg, "            Adjusted UEC (estUec >estRow): %4.0f", estUec);
                             LM->Log(LM->msg);
                           }
                       }

                     // Adjust MFV and 2MFV for intervals where the original RC and UEC are the same.
                     // Both MFV and 2MFV have been adjusted at the beginning of the loop when the original
                     // RC and UEC are not the same.
                     if ( groupHist->getIntOrigRC(i) ==  groupHist->getIntOrigUec(i) )
                       groupHist->adjustMFVand2MFV(i, estRow, estUec);

                     groupHist->setIntRowCount(i, (Int64)estRow);
                     groupHist->setIntUec(i, (Int64)estUec);

                   }
                                               /*==============================*/
                                               /*  ADJUST MULTIPLE INTERVALS   */
                                               /*==============================*/
                 else
                   {
                     for (j = start; j <= i; j++)
                       {
                         estSubRow = (estRow / combRows) * convertInt64ToDouble(groupHist->getIntRowCount(j));
                         estSubUec = (estUec / combUec) * convertInt64ToDouble(groupHist->getIntUec(j));

                         if (estSubUec > estSubRow)
                           estSubUec = MINOF(estSubRow, convertInt64ToDouble(LLONG_MAX));

                         if (LM->LogNeeded())
                           {
                             sprintf(LM->msg, "            Adjusted UEC interval %2d: %4.0f", j, estSubUec);
                             LM->Log(LM->msg);
                           }

                         if ( groupHist->getIntOrigRC(j) ==  groupHist->getIntOrigUec(j))
                           groupHist->adjustMFVand2MFV(j, estSubRow, estSubUec);

                         groupHist->setIntRowCount(j, (Int64)estSubRow);
                         groupHist->setIntUec(j, (Int64)estSubUec);
                       }
                   }
               } // End processing interval UEC
             start = -1;
             combUec = combRows = 0;
           } // END LOOP -- for (i=1; i <= lastInterval; i++)

         if (estimateCnt != 0)
           group->coeffOfVar = coeffOfVarTot / estimateCnt; // Assign the coeffOfVar for this histogram
                                                            // now that all intervals have been processed.
         if (LM->LogNeeded())
           {
             char coeffOfVarStr[16],coeffOfVarTotStr[16];
             sprintf(coeffOfVarStr,"%10.0f",group->coeffOfVar);
             sprintf(coeffOfVarTotStr,"%10.0f",coeffOfVarTot);
             sprintf(LM->msg, "\tAverage CofV for column: %s = %s/%d (groups)",
                     coeffOfVarStr, coeffOfVarTotStr, estimateCnt);
             LM->Log(LM->msg);
           }
                                               /*==============================*/
                                               /*  ROUNDING ERROR ADJUSTMENTS  */
                                               /*==============================*/
         groupHist->getTotalCounts(postSumRC, postSumUec); /* new totals       */
         rowAdj = actualRowCount - postSumRC;
         if (rowAdj != 0)
           {
             // Adjust intervals from loInt to hiInt.  Intervals are numbered 1 to lastInterval.
             //   When (lastInterval == 1 or 2) then (loInt,hiInt) = (1, 1)
             //   When (lastInterval >  2)      then (loInt,hiInt) = (2, lastInterval-1)
             Int32 loInt = (lastInterval <= 2) ? 1 : 2;               // Set to 1 if 1 or 2 intervals.
             Int32 hiInt = (lastInterval <= 2) ? 1 : lastInterval-1;  // Set to 1 if 1 or 2 intervals.

             // Check to see if adjusting all intervals within the range and with >=10000 rows
             // up by 1 row will accommodate all of rowAdj.  If more intervals are required,
             // intervals with >=1000 rows are checked.  This continues for no more than 5 passes
             // down to intervals with >=1 row.  Once this limit is chosen, a final pass uses
             // the limit to adjust the intervals.
             // UEC adjustment occurs only if UEC = rowcount and UEC != 1.  The check for UEC != 1
             // will hopefully avoid an invalid situation e.g. UEC=2 and boundary="(X)",
             // where previous boundary="(X-1)".
             // Since the first and last intervals can be columns used often for join predicates,
             // they will only be used if total # intervals < 3.
             Int32   limit = 10000;  // # of rows required for an interval to be updated.
             Int64 ints  = 0;      // Count of intervals to update.
             while (limit > 1)
               {
                 for (i=loInt; i<=hiInt; i++) if (groupHist->getIntRowCount(i)>=limit) ints++;
                 if (ints >= rowAdj) break;
                 limit /= 10;
                 ints = 0;
               }

             // NOTE: The while loop attempts to ensure that the final row adjustment is correct.
             ints = 0;
             NABoolean assigned = TRUE; // flag to avoid infinite loop.
             while (ints<rowAdj && assigned)
               {
                 assigned = FALSE;
                 for (i=loInt; i<=hiInt && ints<rowAdj; i++)
                   if ((rows=groupHist->getIntRowCount(i)) >= limit)
                     {
                       groupHist->setIntRowCount(i, rows+1);
                       if ((uec=groupHist->getIntUec(i)) == rows && uec != 1) groupHist->setIntUec(i, uec+1);
                       ints++;
                       assigned = TRUE;
                     }
               }

             if (LM->LogNeeded())
               {
                 sprintf(LM->msg, "\tRounding error was " PF64 " rows.  Added " PF64 " rows "
                         "from interval " "%d to %d.",
                                  rowAdj, ints, loInt, hiInt);
                 LM->Log(LM->msg);
                 sprintf(LM->msg, "\tUEC values adjusted if UEC=rows and UEC not 1.");
                 LM->Log(LM->msg);
               }
           }

         if (LM->LogNeeded())
           {
             convertInt64ToAscii(actualRowCount, rowCountStr);
             convertInt64ToAscii(sampleRowCount, uecStr);
             sprintf(LM->msg, "\t[%s] ESTIMATED ROWCOUNT & UEC (ROWCOUNT=%s SAMPLED=%s)", group->colNames->data(), rowCountStr, uecStr);
             LM->Log(LM->msg);
             convertInt64ToAscii(preSumRC, rowCountStr);
             convertInt64ToAscii(preSumUec, uecStr);
             sprintf(LM->msg,"\t\t\tBEFORE: rowcount = %s, uec = %s", rowCountStr, uecStr);
             LM->Log(LM->msg);
             groupHist->getTotalCounts(postSumRC, postSumUec); /* reset - possible adjustment */
             convertInt64ToAscii(postSumRC, rowCountStr);
             convertInt64ToAscii(postSumUec, uecStr);
             sprintf(LM->msg,"\t\t\tAFTER:  rowcount = %s, uec = %s", rowCountStr, uecStr);
             LM->Log(LM->msg);
             sprintf(LM->msg,"\t\t\tLWC:    uec = %4.0f", uecLwcTot);
             LM->Log(LM->msg);
             sprintf(LM->msg,"\t\t\tUJ:     uec = %4.0f", uecUjTot);
             LM->Log(LM->msg);
             sprintf(LM->msg,"\t\t\tSh:     uec = %4.0f", uecShTot);
             LM->Log(LM->msg);
             sprintf(LM->msg,"\t\t\tMOM:    uec = %4.0f", uecMomTot);
             LM->Log(LM->msg);
           }

     checkTime("after fixing sampling counts and performing estimation");
     return retcode;
   }


 /***********************************************/
 /* METHOD:  ClearAllHistograms()               */
 /* PURPOSE: Delete all histograms that were    */
 /*          generated for base table           */
 /* RETCODE:  0 - successful                    */
 /*          -1 - failure                       */
 /* DEPENDENCY: Pre-compiled statements in      */
 /*             SQLHIST.mdf                     */
 /* ASSUMPTIONS: A transaction has already been */
 /*              started.                       */
 /***********************************************/
 // Alternate function definitions -- first for static
 // query, then for dynamic query.
 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
 Lng32 HSGlobalsClass::ClearAllHistograms()
   {
     Lng32 retcode = 0;
     char UIDstr[30];
     Int64 xObjectUID = objDef->getObjectUID();
     convertInt64ToAscii(xObjectUID, UIDstr);
     HSCliStatement::statementIndex stmt;
     HSLogMan *LM = HSLogMan::Instance();

     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\t\t\tDELETE101 (%s, %s)", hstogram_table->data(), UIDstr);
         LM->Log(LM->msg);
       }

     if (tableFormat == SQLMP)
       stmt = HSCliStatement::DELETE101_MP;
     else
       if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
         stmt = HSCliStatement::DELETE101_MX_2300;
       else
         stmt = HSCliStatement::DELETE101_MX;

     HSCliStatement delete101(stmt,
                              (char *)hstogram_table->data(),
                              (char *)&xObjectUID);
     retcode = delete101.execFetch("DELETE101()");
     HSHandleError(retcode);

     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\t\t\tDELETE201 (%s, %s) via DELETE201_MX_2300", hsintval_table->data(), UIDstr);
         LM->Log(LM->msg);
       }

     if (tableFormat == SQLMP)
       stmt = HSCliStatement::DELETE201_MP;
     else
       if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
         stmt = HSCliStatement::DELETE201_MX_2300;
       else
         stmt = HSCliStatement::DELETE201_MX;

     HSCliStatement delete201(stmt,
                              (char *)hsintval_table->data(),
                              (char *)&xObjectUID);
     retcode = delete201.execFetch("DELETE201()");
     HSHandleError(retcode);

     return retcode;
   }

 #else // NA_USTAT_USE_STATIC not defined, define version of function to use dynamic query

 Lng32 HSGlobalsClass::ClearAllHistograms()
   {
     HS_ASSERT(schemaVersion >= COM_VERS_2300);

     HSLogMan *LM = HSLogMan::Instance();
     Lng32 retcode = 0;
     char UIDstr[30];
     convertInt64ToAscii(objDef->getObjectUID(), UIDstr);

     // Delete from Histograms table.

     NAString stmtText = "DELETE FROM ";
     stmtText.append(hstogram_table->data())
             .append(" WHERE TABLE_UID = ")
             .append(UIDstr);

     retcode = HSFuncExecQuery(stmtText.data());
     HSHandleError(retcode);
     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\t\t\tDELETE101 (%s, %s) via dynamic query", hstogram_table->data(), UIDstr);
         LM->Log(LM->msg);
       }

     // Delete from Histogram_Intervals table.

     stmtText = "DELETE FROM ";
     stmtText.append(hsintval_table->data())
             .append(" WHERE TABLE_UID = ")
             .append(UIDstr);

     retcode = HSFuncExecQuery(stmtText.data());
     HSHandleError(retcode);
     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\t\t\tDELETE201 (%s, %s) via dynamic query", hsintval_table->data(), UIDstr);
         LM->Log(LM->msg);
       }

     return retcode;
   }

 #endif // NA_USTAT_USE_STATIC not defined

 /***********************************************/
 /* METHOD:  ClearSelectHistograms()            */
 /* PURPOSE: Delete user-selected histograms    */
 /*          that were generated for base table */
 /* RETCODE:  0 - successful                    */
 /*          -1 - failure                       */
 /* ASSUMPTIONS: A transaction has already been */
 /*              started.                       */
 /***********************************************/
 Lng32 HSGlobalsClass::ClearSelectHistograms()
   {
     Lng32     retcode = 0;
     Int64    xRows = 0;
     NAString oldHistList;
     NAString stmt;
     char     tempStr[30];
     char     uidStr[30];
     HSColGroupStruct* group;
     NABoolean processMultiGroups;
     HSLogMan *LM = HSLogMan::Instance();

     convertInt64ToAscii(objDef->getObjectUID(), uidStr);

     if (singleGroup)
       {
         group = singleGroup;                      /* process single-columns  */
         processMultiGroups = TRUE;                /* then multi-columns      */
       }
     else
       {
         group = multiGroup;                       /* process multi-columns   */
         processMultiGroups = FALSE;               /* only once               */
       }

     while (group != NULL)
       {
         if (group->oldHistid != 0)
           {
                                           /*=================================*/
                                           /*     DETERMINE OLD HISTOGRAM     */
                                           /*             VALUES              */
                                           /*=================================*/
             sprintf(tempStr, " %u,", group->oldHistid);
             oldHistList += tempStr;
             if (group->oldHistidList != "")
               oldHistList += group->oldHistidList;
           }
         group = group->next;
         if (group == NULL && processMultiGroups)
           {
             processMultiGroups = FALSE;
             group = multiGroup;
           }
       }

     retcode = HSGlobalsClass::removeHists(oldHistList, uidStr, "CLEAR HISTOGRAMS");
     return retcode;
   }

 /***********************************************/
 /* METHOD:  DeleteOrphanHistograms()           */
 /* PURPOSE: Deletes obsolete histograms that   */
 /*          may exist. For SQL/MP tables,      */
 /*          histograms are not automatically   */
 /*          deleted when table is dropped.     */
 /* RETCODE:  0 - successful                    */
 /*           all error are ignored because it  */
 /*           does not hurt or corrupt anything */
 /*           if these obsolete histograms exist*/
 /* ASSUMPTIONS: A transaction has already been */
 /*              started.                       */
 /***********************************************/
 Lng32 HSGlobalsClass::DeleteOrphanHistograms()
   {
     Lng32     retcode = 0;
     Int64    rows = 0;
     NAString query;
     char     rowCountStr[30];
     HSLogMan *LM = HSLogMan::Instance();

     if (tableFormat == SQLMP)
       {
         query = "DELETE FROM ";
         query += hstogram_table->data();
         query += " WHERE TABLE_UID NOT IN (SELECT CREATETIME FROM ";
         query += objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT);
         query += ".TABLES)";
         retcode = HSFuncExecTransactionalQueryWithRetry
           (query, -UERR_INTERNAL_ERROR, &rows, "CLEAR_ORPHANS",
            NULL, NULL);
         if (LM->LogNeeded())
           {
             convertInt64ToAscii(rows, rowCountStr);
             sprintf(LM->msg, "\t\t\t%s ROWS DELETED FROM %s", rowCountStr, hstogram_table->data());
             LM->Log(LM->msg);
           }

         query = "DELETE FROM ";
         query += hsintval_table->data();
         query += " WHERE TABLE_UID NOT IN (SELECT CREATETIME FROM ";
         query += objDef->getCatalogLoc(HSTableDef::EXTERNAL_FORMAT);
         query += ".TABLES)";
         retcode = HSFuncExecTransactionalQueryWithRetry
           (query, -UERR_INTERNAL_ERROR, &rows, "CLEAR_ORPHANS",
            NULL, NULL);
         if (LM->LogNeeded())
           {
             convertInt64ToAscii(rows, rowCountStr);
             sprintf(LM->msg, "\t\t\t%s ROWS DELETED FROM %s", rowCountStr, hsintval_table->data());
             LM->Log(LM->msg);
           }
       }
     return 0;
   }

 /***********************************************/
 /* METHOD:  GetStatistics()                    */
 /* PURPOSE:                                    */
 /*                                             */
 /*                                             */
 /* RETCODE:  0 - successful                    */
 /*          -1 - failure                       */
 /* ASSUMPTIONS: Parser has set all options     */
 /*    and set up the singleGroup and           */
 /*    multiGroup lists.                        */
 /*                                             */
 /***********************************************/
 // General Logic:
 //
 // The parser has set up the lists singleGroup and multiGroup.
 // Those contain the names of all of the histograms by column name
 // that were just requested.  The column name(s) will be taken from
 // each entry.  The table group list will be walked looking for matching
 // names.  Once the name is found the histogram id will be taken from the
 // oldHistid entry.
 //
 // That histogram id will be used with a query to get the histogram data
 // for that id.

 Lng32 HSGlobalsClass::GetStatistics(NAString& displayData, Space& space)
 {
     Lng32 retcode = 0;
     NABoolean gotOne = false;

     HSColGroupStruct *tableGroupList;
     HSColGroupStruct *tableGroup;
     HSColGroupStruct *listedGroup;
     retcode = groupListFromTable(tableGroupList); // Reads from HISTOGRAMS table
                                                   // for SERIALIZABLE ACCESS.
     HSHandleError(retcode);
                                           /*=================================*/
                                           /*   WRITE TABLE ID ONCE FOR       */
                                           /*   FOR EACH SHOWSTATS COMMAND    */
                                           /*=================================*/
     if (optFlags & DETAIL_OPT)
        displayData += "Detailed ";
     displayData += "Histogram data for Table ";
     displayData += user_table->data();

     char uidAsChar[20];
     Int64 objID = objDef->getObjectUID();
     convertInt64ToAscii(objID, uidAsChar);
     displayData += "\nTable ID: ";displayData.append(uidAsChar);
     displayData += "\n";

     if (!(optFlags & DETAIL_OPT)){
         displayData += "\n   Hist ID # Ints    Rowcount         UEC Colname(s)";
         displayData += "\n========== ====== =========== =========== ===========================\n";
     }

 	                                      /*=================================*/
 	                                      /*   If individual histograms      */
 	                                      /*   were requested then they are  */
 	                                      /*   listed in singleGroup and     */
 	                                      /*   multigroup in reverse order.  */
 	                                      /*=================================*/
     for(Int32 twice=0;twice<2;twice++){
         if(twice == 0)
             listedGroup = ReverseList(singleGroup);
         else
             listedGroup = ReverseList(multiGroup);
         while (listedGroup != NULL && !retcode)
 	                                      /*=================================*/
 	                                      /*   All histograms are listed in  */
 	                                      /*   tableGroupList so it is   */
 	                                      /*   searched for each one.        */
 	                                      /*=================================*/
         {
             tableGroup = tableGroupList;
             while (tableGroup != NULL && !retcode)
             {
                 if(tableGroup->colSet == listedGroup->colSet && // order doesn't matter
                    tableGroup->reason != HS_REASON_EMPTY)
                 {
                     retcode = DisplayHistograms(displayData, space,
                         tableGroup->oldHistid, tableGroup->colNames->data());
                     gotOne = true;
                     break;
                 }
                 tableGroup = tableGroup->next;
             }
             listedGroup = listedGroup->prev;
         }
     }

     if(!gotOne && !retcode)
         displayData += "\nNo Histograms exist for the requested columns or groups\n";

     return retcode;
 }

 // Copied from cli/Cli.cpp.
 inline NABoolean isERROR(Lng32 retcode)
 {
   return (retcode < 0);
 }

 Lng32 HSGlobalsClass::DisplayHistograms(NAString& displayData, Space& space,
                                         const ULng32 oldHistId, const char* colnames)
 {
     Lng32 retcode = 0;
     HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, "DISPLAY HISTOGRAMS", TRUE);

     // HISTOGRAMS table columns:
     Lng32 colpos = 0;
     Lng32 colcount = 0;
     short intcount = 0;
     Int64 rowcount = 0;
     Int64 totaluec = 0;
     Int64 mc_rowcount = 0;
     Int64 mc_totaluec = 0;
     double stdv = 0;
     Int64 mfv = 0;
     Int64 mfv2 = 0;
     NAWchar* lowval_;
     NAWchar* highval_;
     NAWchar* mfval_;
     lowval_ =  new(STMTHEAP) NAWchar[HS_MAX_BOUNDARY_LEN + 1];
     highval_ =  new(STMTHEAP) NAWchar[HS_MAX_BOUNDARY_LEN + 1];
     mfval_ =  new(STMTHEAP) wchar_t[HS_MAX_BOUNDARY_LEN + 1];

     short intnumber = 0;   // Matches intcount type.

     NABoolean dispMFV = FALSE;
     if (CmpCommon::getDefault(USTAT_SHOW_MFV_INFO) == DF_ON)
       dispMFV = TRUE;

     ULng32 histID = oldHistId;      // Assign HISTOGRAM_ID for query.
     Int64 objID  = objDef->getObjectUID(); // Assign TABLE_UID for query.

     char numbuffer[2*HS_MAX_BOUNDARY_LEN];    //  used to write histogram lines to output.
     char numbuffer1[2*HS_MAX_BOUNDARY_LEN];    //  used to write histogram lines to output.
     char mfvbuffer[2*HS_MAX_BOUNDARY_LEN];    //  used to write mfv info  to output.
     char mfvbuffer1[2*HS_MAX_BOUNDARY_LEN];    //  used to write mfv info  to output.
     NAString colnamesStr(colnames);
     char *dummyFirstUntranslatedChar;
     unsigned int outputDataLen;

 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
     HSCliStatement::statementIndex stmt;

     if (tableFormat == SQLMX)
        if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
          stmt = HSCliStatement::SHOWHIST_MX_2300;
        else
          stmt = HSCliStatement::SHOWHIST_MX;
     else
       stmt = HSCliStatement::SHOWHIST_MP;

     HSCliStatement histData( stmt,
                               (char *)hstogram_table->data(),
                               (char *)&objID,
                               (char *)&histID
                             );

 #else // NA_USTAT_USE_STATIC not defined, use dynamic query

     HS_ASSERT(schemaVersion >= COM_VERS_2300);
     char sbuf[25];
     sprintf(sbuf, PF64, objID);
     NAString qry = "SELECT COL_POSITION, COLCOUNT, INTERVAL_COUNT, ROWCOUNT, "
                           "TOTAL_UEC, LOW_VALUE, HIGH_VALUE "
                    "FROM ";
     qry.append(hstogram_table->data())
        .append(   " WHERE TABLE_UID = ")
        .append(sbuf);
     qry.append(   "   AND HISTOGRAM_ID = ");
     sprintf(sbuf, "%d", histID);
     qry.append(sbuf)
        .append(   "   AND REASON <> ' '")
        .append(   " ORDER BY TABLE_UID, HISTOGRAM_ID, COL_POSITION");

     HSCursor histData(STMTHEAP, "DISPLAY_HIST");
     retcode = histData.prepareQuery(qry.data(), 0, 7);
     HSHandleError(retcode);

 #endif // NA_USTAT_USE_STATIC not defined

     if (isERROR(retcode=histData.open())) {
         displayData += "\nThe requested histogram does not exist\n";
         NADELETEBASIC(lowval_, STMTHEAP);
         NADELETEBASIC(highval_, STMTHEAP);
         return retcode;
     }

     retcode = histData.fetch(7,
                        (char *)&colpos, (char *)&colcount,
                        (char *)&intcount, (char *)&rowcount,(char *)&totaluec,
                        (char *)&lowval_[0], (char *)&highval_[0]);
     if (isERROR(retcode)) {
         displayData += "\nUnable to fetch the histogram data for the table.\n";
         histData.close();
         return retcode;
     }

     NABoolean hideIntervalInfoForMCGroup = TRUE;
     if(singleGroup || (multiGroup && CmpCommon::getDefault(USTAT_SHOW_MC_INTERVAL_INFO) == DF_ON))
       hideIntervalInfoForMCGroup = FALSE;
     if(hideIntervalInfoForMCGroup)
     {
       mc_rowcount = rowcount;
       mc_totaluec = totaluec;
     }

     // Now format the data to the stream
     if (optFlags & DETAIL_OPT)
     {
         displayData += "\nHist ID:    " + Int64ToNAString(uint32ToInt64(histID)) +
                        "\nColumn(s):  " + colnamesStr                    +
                        "\nTotal Rows: " + Int64ToNAString(rowcount)      +
                        "\nTotal UEC:  " + Int64ToNAString(totaluec)      +
                        "\nLow Value:  ";
         char *dummyFirstUntranslatedChar;
         unsigned int outputDataLen;

         LocaleToUTF8(cnv_version1,
                      (const char *) &lowval_[1],
                      (short)lowval_[0],
                      numbuffer,
                      sizeof(numbuffer),
                      cnv_UTF16,
                      dummyFirstUntranslatedChar,
                      &outputDataLen,
                      TRUE);
         displayData += numbuffer;
         LocaleToUTF8(cnv_version1,
                      (const char *) &highval_[1],
                      (short)highval_[0],
                      numbuffer,
                      sizeof(numbuffer),
                      cnv_UTF16,
                      dummyFirstUntranslatedChar,
                      &outputDataLen,
                      TRUE);

 	    displayData += "\nHigh Value: ";
         displayData += numbuffer;
         displayData += "\nIntervals:  ";
 	    if(hideIntervalInfoForMCGroup)
 	      displayData += LongToNAString((Lng32)1);
 	    else
           displayData += LongToNAString((Lng32)intcount);
     }
     else
     {
         sprintf(numbuffer, "%10u %6d %11s %11s ", histID, intcount,
                 Int64ToNAString(rowcount).data(), Int64ToNAString(totaluec).data());
         displayData += numbuffer + colnamesStr;
     }

     // Flush the space after each histogram.  allocateAndCopyToAlignedSpace will encode the string
     // like varchar (with an embedded length of size short).  Call for each histogram in order to
     // avoid a buffer overflow in MXCI.  MUST BE CALLED ON LINE BOUNDARY.
     space.allocateAndCopyToAlignedSpace(displayData, displayData.length(), sizeof(short));
     displayData.resize(0);  // clear what was just copied.

     histData.close(); // finished writing information for this histogram.

     // Go ahead to write information for intervals of this histogram if DETAIL
     // option was specified, else return now.
     HSFilterWarning(retcode);  // clean up any warnings before possible return
     if (!(optFlags & DETAIL_OPT))
         return 0;

 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
     if (tableFormat == SQLMX)
       if (HSGlobalsClass::schemaVersion >= COM_VERS_2300)
         stmt = HSCliStatement::SHOWINT_MX_2300;
       else
         stmt = HSCliStatement::SHOWINT_MX;
     else
         stmt = HSCliStatement::SHOWINT_MP;

     HSCliStatement intData( stmt,
         (char *)hsintval_table->data(),
         (char *)&objID,
         (char *)&histID);

 #else // NA_USTAT_USE_STATIC not defined, use dynamic query

     sprintf(sbuf, PF64, objID);
     qry =       "SELECT INTERVAL_ROWCOUNT, INTERVAL_NUMBER, INTERVAL_UEC, INTERVAL_BOUNDARY,"
                        "STD_DEV_OF_FREQ, V1, V2, V5"
                " FROM ";
     qry.append(hsintval_table->data())
        .append(" WHERE TABLE_UID = ")
        .append(sbuf)
        .append(  " AND HISTOGRAM_ID = ");
     sprintf(sbuf, "%d", histID);
     qry.append(sbuf);
     qry.append(" ORDER BY TABLE_UID, HISTOGRAM_ID, INTERVAL_NUMBER"
                " FOR READ UNCOMMITTED ACCESS");

     HSCursor intData(STMTHEAP, "DISPLAY_HISTINT");
     retcode = intData.prepareQuery(qry.data(), 0, 8);
     HSHandleError(retcode);

 #endif // NA_USTAT_USE_STATIC not defined

     if (isERROR(retcode=intData.open())) {
         displayData += "\nUnable to open the Intervals Table.\n";
         return retcode;
     }
     displayData +=   "\nNumber    Rowcount         UEC Boundary";
     if (dispMFV)
     {
       displayData +=   "                                     Stdev MFVRowCount 2ndMFVRowCount MFV";
     }
     displayData +=   "\n====== =========== =========== ======================================";

     if (dispMFV)
       displayData +=   " =========== =========== ============== ======================================";

     // Flush the data after interval header.
     space.allocateAndCopyToAlignedSpace(displayData, displayData.length(), sizeof(short));
     displayData.resize(0);  // clear what was just copied.

     for(short jj=0; jj<=intcount; jj++){
         retcode = intData.fetch(8,
             (void *)&rowcount,(void *)&intnumber,(void *)&totaluec,
             (void *)&highval_[0],(void *)&stdv, (void *)&mfv,(void *)&mfv2,
             (void *)&mfval_[0]);
         if(isERROR(retcode)) {
             displayData += "\nInterval Not Found in the Intervals Table.\n";
             intData.close();
             return retcode;
         }

 	    if(hideIntervalInfoForMCGroup)
 	    {
           if(jj == intcount)
           {
             rowcount = mc_rowcount;
             totaluec = mc_totaluec;
           }
           else if(jj != 0)
             continue;
         }

         // This will be a max of 5+1+21+1+21+1 = 50 chars.
 	    sprintf(numbuffer, "%6d %11s %11s ", (hideIntervalInfoForMCGroup && intnumber) ? 1 : intnumber,
                                               Int64ToNAString(rowcount).data(),
                                               Int64ToNAString(totaluec).data());

         displayData += numbuffer;

         LocaleToUTF8(cnv_version1,
                       (const char *) &highval_[1],
                       (short)highval_[0],
                       numbuffer1,
                       sizeof(numbuffer1),
                       cnv_UTF16,
                       dummyFirstUntranslatedChar,
                       &outputDataLen,
                       TRUE);
         sprintf(numbuffer, "%-38s", numbuffer1);

         displayData += numbuffer;

         if (dispMFV)
         {
           sprintf(mfvbuffer, "%11.2f ", stdv);
           displayData += mfvbuffer;
           sprintf(mfvbuffer, " %11s %14s ", Int64ToNAString(mfv).data(), Int64ToNAString(mfv2).data());
           displayData += mfvbuffer;
           LocaleToUTF8(cnv_version1,
                         (const char *) &mfval_[1],
                         (short)mfval_[0],
                         mfvbuffer1,
                         sizeof(mfvbuffer1),
                         cnv_UTF16,
                         dummyFirstUntranslatedChar,
                         &outputDataLen,
                         TRUE);
           sprintf(mfvbuffer, "%-38s", mfvbuffer1);
           displayData += mfvbuffer;
         }

         TrimNAStringSpace(displayData, false, true);

         // Flush the data after each interval line to avoid overflowing buffers in MXCI.
         space.allocateAndCopyToAlignedSpace(displayData, displayData.length(), sizeof(short));
         displayData.resize(0);  // clear what was just copied.
     }
     intData.close();
     displayData += "\n";

     HSFilterWarning(retcode);  // filter out any warnings so HSErrorCatcher doesn't act up

     return 0;
 }

 HSColGroupStruct* HSGlobalsClass::ReverseList(HSColGroupStruct* list)
 {
    HSColGroupStruct *saveGroup;

    if(list==NULL)
       return list;
    saveGroup = list;
    list = list->next;
    while(list != NULL){
        list->prev = saveGroup;
        saveGroup = list;
        list = list->next;
    }
    return saveGroup;
 }


 /***********************************************/
 /* METHOD:  print()                            */
 /* PURPOSE: print histogram information to log.*/
 /***********************************************/
 void HSColGroupStruct::print()
   {
     HSLogMan *LM = HSLogMan::Instance();

     if (this != NULL)
       {
         sprintf(LM->msg, "\t\t\t%d, %u, %u, (%s)", colCount, oldHistid, newHistid, colNames->data());
         LM->Log(LM->msg);
         if (this->next != NULL)
           this->next->print();
       }
   }


 // This function is called for each rowset that is read for internal sort.
 // The null indicators for the data just read are checked starting at the
 // front of the array, and when one is found to be set, the corresponding
 // value in the data array is overwritten with the first non-null value
 // found scanning backwards from the end of the data array. This compacts
 // the non-null values in the data array without excessive data movement.
 // Nulls are counted as they are found, so the null interval in the histogram
 // can be set up properly.
 //
 template <class T>
 void processNullsForColumn(HSColGroupStruct *group, Lng32 rowsRead, T* dummyPtr)
 {
   short *frontNull, *backNull;
   T     *frontData, *backData;

   // copy data for MC
   if ( HSGlobalsClass::performISForMC() && (group->mcs_usingme > 0) &&
       (group->ISdatatype != REC_BYTE_F_ASCII) &&
       (group->ISdatatype != REC_BYTE_F_DOUBLE) &&
       (group->ISdatatype != REC_BYTE_V_ASCII) &&
       (group->ISdatatype != REC_BYTE_V_DOUBLE)
      )

   {
       memcpy (group->mcis_nextData, group->nextData, rowsRead * sizeof(T));
       group->mcis_nextData = (T*)group->mcis_nextData + rowsRead;
   }

   if (!group->nullIndics)
     {
       group->nextData = (T*)group->nextData + rowsRead;
       group = group->next; //@ZX -- looks wrong; modifies by-value param right before return?
       return;
     }

   // set the MC bitmap null indicator
   if (group->mcs_usingme > 0)
   {
      Int32 start = ((T*)group->nextData - (T*)group->data) + group->nullCount;
      Int32 end = start + rowsRead;
      frontNull = group->nullIndics;
      for (Int32 idx= start; idx < end; idx++)
      {
         if (*frontNull == -1)
            group->mcis_nullIndBitMap->setBit(idx);
         frontNull++;
      }
   }

   frontData = (T*)group->nextData;
   backData  = frontData + rowsRead - 1;
   frontNull = group->nullIndics;
   backNull  = frontNull + rowsRead - 1;
   while (frontNull < backNull)
     {
       while (*frontNull != -1 && frontNull < backNull)
         {
           frontNull++;
           frontData++;
         }

       while (*backNull == -1 && frontNull < backNull)
         {
           backNull--;
           backData--;
           group->nullCount++;
         }

       if (frontNull < backNull)
         {
           *frontData++ = *backData--;
           frontNull++;
           backNull--;
           group->nullCount++;
         }
     }

   if (frontNull == backNull)
     {
       if (*frontNull == -1)
         group->nullCount++;
       else
         frontData++;
    }

   group->nextData = frontData;
 }

 // For each column currently being processed for internal sort (denoted by
 // state=PENDING, invoke the template function that removes and counts
 // nulls from the data array. A dummy null pointer cast to the appropriate
 // type is passed to the template function, so the right template
 // instantiation will be used.
 // In this method, for VARCHAR columns, we do something other than what this
 // method is supposed to do. Probably this new code should be moved out of this
 // method to a separate new method in future.
 // For varchar columns, we compute average column data size and store it in
 // group->avgVarCharSize member.
 //
 // Return code: 0 on success, -1 on failure.
 //
 Lng32 HSGlobalsClass::processInternalSortNulls(Lng32 rowsRead, HSColGroupStruct *group)
 {
   HSLogMan *LM = HSLogMan::Instance();
   ISFixedChar *chPtr;
   ISVarChar *vchPtr;
   ULng32 sumSize = 0;
   NABoolean computeVarCharSize = FALSE;
   NABoolean maxLongLimit = FALSE;
   short *nullInd = NULL;
   Int32 vcInflatedLen, vcCompactLen;
   char* inflatedDataPtr;
   char *dataPtr, errtxt[100]={0};
   Int32 i;
   Lng32 retcode=0;
   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, errtxt, TRUE);

   while (group)
     {
       if (group->state != PENDING && group->state != OVERRAN)
         {
           group = group->next;
           continue;
         }

       // keep track of total number of rows read - used by MC IS
       group->mcis_rowsRead += rowsRead;

       switch (group->ISdatatype)
         {
           case REC_BIN8_SIGNED:
             processNullsForColumn(group, rowsRead, (Int8*)NULL);
             break;

           case REC_BOOLEAN:
           case REC_BIN8_UNSIGNED:
             processNullsForColumn(group, rowsRead, (UInt8*)NULL);
             break;

           case REC_BIN16_SIGNED:
             processNullsForColumn(group, rowsRead, (short*)NULL);
             break;

           case REC_BIN16_UNSIGNED:
             processNullsForColumn(group, rowsRead, (unsigned short*)NULL);
             break;

           case REC_BIN32_SIGNED:
             processNullsForColumn(group, rowsRead, (Int32*)NULL);
             break;

           case REC_BIN32_UNSIGNED:
             processNullsForColumn(group, rowsRead, (UInt32*)NULL);
             break;

           case REC_BIN64_SIGNED:
             processNullsForColumn(group, rowsRead, (Int64*)NULL);
             break;

           case REC_BIN64_UNSIGNED:
             processNullsForColumn(group, rowsRead, (UInt64*)NULL);
             break;

           case REC_IEEE_FLOAT32:
             processNullsForColumn(group, rowsRead, (float*)NULL);
             break;

           case REC_IEEE_FLOAT64:
             processNullsForColumn(group, rowsRead, (double*)NULL);
             break;

           case REC_BYTE_F_ASCII:
           case REC_BYTE_F_DOUBLE:
             // Set up elements of data array, which are pointers to char values.
             chPtr = (ISFixedChar*)group->nextData;
             dataPtr = (char*)group->strNextData;
             for (i=0; i<rowsRead; i++)
               {
                 chPtr->setContent(dataPtr);
                 dataPtr += group->ISlength;
                 chPtr++;
               }
             group->strNextData = dataPtr;  // not affected by null processing
             ISFixedChar::setLength(group->ISlength);
             processNullsForColumn(group, rowsRead, (ISFixedChar*)NULL);
             break;

           case REC_BYTE_V_ASCII:
           case REC_BYTE_V_DOUBLE:
             {
               // Set up elements of data array, which are pointers to varchar
               // values (2-byte length field followed by string). The length
               // we advance the ptr by includes an extra byte for alignment if
               // the varchar length is odd.
               // We also compute average varchar data size here which is not
               // something processInternalSortNulls() method should be doing.
               // This new code (computing avg size) will be moved to a separate
               // new method in the future.
               vcInflatedLen = group->inflatedVarcharContentSize();
               vchPtr = (ISVarChar*)group->nextData;
               dataPtr = (char*)group->strNextData;
               inflatedDataPtr = (char*)group->varcharFetchBuffer;
               nullInd = group->nullIndics;
               NABoolean compacted = group->isCompacted();
               Int64 nulls = 0;  // Number of nulls in this batch of values
               for (i=0; i<rowsRead; i++)
                 {
                   if (nullInd && *nullInd == -1)
                     {
                       nulls++;
                       if (compacted)
                         inflatedDataPtr += vcInflatedLen;
                       else
                         dataPtr += vcInflatedLen;
                     }
                   else
                   {
                     if (compacted)
                       {
                         vcCompactLen = HSColGroupStruct::varcharContentSize(*(Int16*)inflatedDataPtr);
                         group->sumSize += vcCompactLen;
                         if (group->state == PENDING)  // that is, not OVERRAN
                           {

                             if (dataPtr + vcCompactLen > (char *)group->strData + group->strMemAllocated)
                               {
                                 // We underestimated the space needed for compacted varchars,
                                 // so don't save anymore. We'll continue to compute the actual
                                 // average varchar length though.
                                 group->state = OVERRAN;
                                 if (LM->LogNeeded())
                                   {
                                     sprintf(LM->msg, "Exhausted varchar compaction memory for column %s", group->colNames->data());
                                     LM->Log(LM->msg);
                                   }
                               }
                             else
                               {
                                 memcpy(dataPtr, inflatedDataPtr, (ULng32)vcCompactLen);
                                 inflatedDataPtr += vcInflatedLen;
                                 vchPtr->setContent(dataPtr);
                                 dataPtr += vcCompactLen;
                               }
                           }
                       }
                     else
                       {
                         vchPtr->setContent(dataPtr);
                         dataPtr += vcInflatedLen;
                         group->sumSize += vchPtr->getLength();
                       }

                     vchPtr++;
                   }

                   if (nullInd)
                     nullInd++;
                 }

               group->nullCount += nulls;
               group->rowsRead += rowsRead;
               // compute average varchar size from running rows count and running sum of varchar sizes
               if (group->rowsRead > 0)
                 group->avgVarCharSize = (double)group->sumSize / (double)group->rowsRead;
               group->nextData = vchPtr;
               group->strNextData = dataPtr;
             }
             break;

           default:
             sprintf(errtxt, "processInternalSortNulls(): unknown type %d",
                             group->ISdatatype);
             sprintf(LM->msg, "INTERNAL ERROR: %s", errtxt);
             LM->Log(LM->msg);
             retcode=-1;
             HSHandleError(retcode);
             break;
         }

       group = group->next;
     }
     return retcode;
 }

 // Return the maximum amount of memory we want to allocate for internal sort
 // at any given time; or -1 if an error occurred
 Int64 HSGlobalsClass::getMaxMemory()
 {
     // the NAHeap will never exceed around 1.3GB.
     Int64       mem;
     HSLogMan   *LM = HSLogMan::Instance();

     mem = Int64(sysconf(_SC_AVPHYS_PAGES)) * Int64(sysconf(_SC_PAGE_SIZE));

     if (LM->LogNeeded())
       {
         LM->Log("Entering getMaxMemory()");
         sprintf(LM->msg, "The amount of physical memory currently available is %ld", mem);
         LM->Log(LM->msg);

         sprintf(LM->msg, "Will use up to %.7f percent of the min of the available physical memory, and CQD USTAT_NAHEAP_ESTIMATED_MAX",
                          ISMemPercentage_ * 100);
         LM->Log(LM->msg);
       }

   Int64 NAHEAP_ESTIMATED_MAX = (Int64)
                 (CmpCommon::getDefaultNumeric(USTAT_NAHEAP_ESTIMATED_MAX) *
                 1024 * 1024 * 1024);

   // Limit the amount of assumed available memory by the estimated max heap size.
   if (mem > NAHEAP_ESTIMATED_MAX)
     mem = NAHEAP_ESTIMATED_MAX;

   // Restrict available memory to the percentage we want to use.
   mem = (Int64)(mem * ISMemPercentage_);

   if (LM->LogNeeded())
     {
       // Use function to convert mem to string -- can't use sprintf with %I64d
       // because %I64d doesn't work as a format specifier on NSK.
       strcpy(LM->msg, "getMaxMemory: returned ");
       formatFixedNumeric(mem, 0, LM->msg + strlen(LM->msg));
       LM->Log(LM->msg);
     }

   return mem;
 }

 // Indicates whether the column can be handled by internal sort.
 // Internal sort can handle all current international char sets supported by
 // SQL/MX.  There are two encodings for char sets, UCS2 and ISO88591, both
 // of these are sorted in a binary manner (there is no special handling) and
 // no embedded NULLs (2/26/08).
 //
 bool isInternalSortType(HSColumnStruct &col)
 {
   HSLogMan *LM = HSLogMan::Instance();

   switch (col.datatype)
     {
       case REC_BOOLEAN:
       case REC_BIN8_SIGNED:
       case REC_BIN8_UNSIGNED:
       case REC_BIN16_SIGNED:
       case REC_BIN16_UNSIGNED:
       case REC_BIN32_SIGNED:
       case REC_BIN32_UNSIGNED:
       case REC_BIN64_SIGNED:
       case REC_BIN64_UNSIGNED:
       case REC_DECIMAL_LSE:
       case REC_DECIMAL_UNSIGNED:
       case REC_DECIMAL_LS:
       case REC_IEEE_FLOAT32:
       case REC_IEEE_FLOAT64:
       case REC_BYTE_F_ASCII:
       case REC_BYTE_V_ASCII:
         return true;

       case REC_BYTE_F_DOUBLE:
       case REC_BYTE_V_DOUBLE:
         if (col.charset == CharInfo::UNICODE)
           {
             NAString MS1v = ActiveSchemaDB()->getDefaults().getValue(MODE_SPECIAL_1);
             if ( MS1v == "ON" )
               {
                  return false; //In these modes, Internal Sort won't work on UCS2 columns.
               }
           }
         return true;

       case REC_DATETIME:
         switch (col.precision)
           {
             case REC_DTCODE_DATE:
             case REC_DTCODE_TIME:
             case REC_DTCODE_TIMESTAMP:
               return true;
             default:
               LM->Log("INTERNAL ERROR (isInternalSortType):");
               sprintf(LM->msg, "Undefined datetime precision type %d", col.precision);
               LM->Log(LM->msg);
               *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                                   << DgString0("isInternalSortType()")
                                   << DgString1("N/A")
                                   << DgString2(LM->msg);
               throw CmpInternalException("failure in isInternalSortType()",
                                          __FILE__, __LINE__);
           }

       case REC_INT_YEAR:
       case REC_INT_MONTH:
       case REC_INT_YEAR_MONTH:
       case REC_INT_DAY:
       case REC_INT_HOUR:
       case REC_INT_DAY_HOUR:
       case REC_INT_MINUTE:
       case REC_INT_HOUR_MINUTE:
       case REC_INT_DAY_MINUTE:
       case REC_INT_SECOND:
       case REC_INT_MINUTE_SECOND:
       case REC_INT_HOUR_SECOND:
       case REC_INT_DAY_SECOND:
         return true;

       default:
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "Type %d not handled by internal sort: column %s",
                              col.datatype,
                              col.colname->data());
             LM->Log(LM->msg);
           }
         return false;
     }
 }

 // Determines whether internal sort is the most efficient method for the column,
 // based on type and percentage of values that are distinct (using information
 // from the column's existing histogram). If there is no existing histogram for
 // the column, the values used default to 0, and internal sort will not be used.
 //
 NABoolean isInternalSortEfficient(Int64 rows, HSColGroupStruct *group)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 dataType = group->ISdatatype;
   NABoolean returnVal;
   double uecRate;
   double uecRateMinForIS = 0;

   if (group->prevRowCount == 0)
     uecRate = 0.0;
   else
     uecRate = group->prevUEC / (double)group->prevRowCount;

   // always use IS when there is no existing histogram for the column
   if ((uecRate == 0) && CmpCommon::getDefault(USTAT_USE_IS_WHEN_NO_STATS) == DF_ON)
     {
        returnVal = TRUE;
     }
   else if ((group->mcs_usingme > 0) &&
            (CmpCommon::getDefault(USTAT_IS_IGNORE_UEC_FOR_MC) == DF_ON))
     {
        // if this column is used by MC and MCIS is ON
        // then compute this column using IS regardless of UEC
        returnVal = TRUE;
     }
   else if ((dataType >= REC_MIN_BINARY &&
        dataType <= REC_MAX_BINARY)||
        dataType == REC_DECIMAL_LSE ||
        dataType == REC_DECIMAL_UNSIGNED ||
        dataType == REC_DECIMAL_LS)
     {
       // For integral types, number of distinct values must be at least
       // USTAT_MIN_DEC_BIN_UEC_FOR_IS of total (default 3%).
       uecRateMinForIS = CmpCommon::getDefaultNumeric(USTAT_MIN_DEC_BIN_UEC_FOR_IS);
       returnVal = (uecRate >= uecRateMinForIS);
     }
   else if (DFS2REC::isAnyCharacter(dataType))
     {
       // For char types, if the total amount of data (rows * length) is less than
       // USTAT_MAX_CHAR_DATASIZE_FOR_IS (default to 1000 MB), use IS. Otherwise
       // the number of distinct values must be at least
       // USTAT_MIN_CHAR_UEC_FOR_IS of total (default 20%).
       if ( rows * group->ISlength < 1024*1024*CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_DATASIZE_FOR_IS) )
         returnVal = TRUE;
       else {
         uecRateMinForIS = CmpCommon::getDefaultNumeric(USTAT_MIN_CHAR_UEC_FOR_IS);
         returnVal = (uecRate >= uecRateMinForIS);
       }
     }
   else
     returnVal = TRUE;  // No threshold established yet for other types; use IS

   if (LM->LogNeeded())
     {
       if ((group->mcs_usingme > 0) && (CmpCommon::getDefault(USTAT_IS_IGNORE_UEC_FOR_MC) == DF_ON))
         sprintf(LM->msg, "MCIS is ON and column used by MC, skipping uec check for %s; internal sort will be used",
                          group->colSet[0].colname->data());
       else if (uecRate == 0.0)
         sprintf(LM->msg, "No existing histogram for %s; internal sort will not be used",
                          group->colSet[0].colname->data());
       else if (returnVal)
         sprintf(LM->msg, "%d%% of values for column %s are distinct; internal sort will be used",
                          (Int32)(uecRate * 100 + .5),
                          group->colSet[0].colname->data());
       else
         sprintf(LM->msg, "Only %d%% of values for column %s are distinct; min UEC to use IS is %d%%; "
                          "internal sort will NOT be used",
                          (Int32)(uecRate * 100 + .5),
                          group->colSet[0].colname->data(),
                          (Int32)(uecRateMinForIS * 100 + .5));
       LM->Log(LM->msg);
     }

   return returnVal;
 }

 NABoolean HSGlobalsClass::allGroupsFitInMemory(Int64 rows)
 {
   Int64 memLeft = getMaxMemory();

   // account for at least one multi-column memory if MC IS is used
    HSColGroupStruct *mgroup = multiGroup;
    while (mgroup && memLeft > 0)
    {
       memLeft -= mgroup->memNeeded;
       mgroup = mgroup->next;
    }

   Int32 count = 0;
   HSColGroupStruct *group = singleGroup;
   while (group && memLeft > 0)
     {
       if (group->memNeeded > 0 &&        // was set to 0 if exceeds address space
           group->memNeeded < memLeft &&
           isInternalSortType(group->colSet[0]) &&
           isInternalSortEfficient(rows, group))
         {
           count++;
           memLeft -= group->memNeeded;
         }
       group = group->next;
     }

   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     {
       sprintf(LM->msg,
          "HSGlobalsClass::allGroupsFitInMemory(): count of single groups that fit =%d, total single count=%d", count, singleGroupCount);
       LM->Log(LM->msg);
     }

   return count == singleGroupCount;
 }

 // Determines a set of columns to process with internal sort, based on
 // available memory. selectSortBatch() is called in a loop, which will
 // typically be executed only once. However, when the call to
 // allocateMemoryForColumns() returns 0, meaning that memory could not be
 // allocated for any columns in the selected batch, we loop again, the
 // memory allocation routine having adjusted things to be more conservative
 // in the amount of memory we request.
 //
 // Parameters:
 //   rows - Number of rows we are allocating for. Needed for deleting arrays
 //          if we can't allocate all memory needed.
 //   internalSortWhenBetter - If TRUE, choose internal sort or standard sort
 //          based on expected performance.
 //   trySampleTableBypass - If TRUE, read directly into memory instead of sample
 //          table if possible.
 //
 // Return value:
 //   The number of columns to process in the next batch.
 //
 Int32 HSGlobalsClass::getColsToProcess(Int64 rows,
                                      NABoolean internalSortWhenBetter,
                                      NABoolean trySampleTableBypass)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Int32 numColsSelected, numColsToProcess;

   do
     {
       numColsSelected = selectSortBatch(rows, internalSortWhenBetter,
                                         trySampleTableBypass);
       if (numColsSelected > 0)
         numColsToProcess = allocateMemoryForInternalSortColumns(rows);
       else
         numColsToProcess = 0;
     }
   while (numColsSelected > 0 && numColsToProcess == 0);

   // If we had to throw some columns back, log the final list of ones to be
   // processed in this batch.
   if (LM->LogNeeded() && numColsSelected > numColsToProcess)
     {
       LM->Log("Columns retained by getColsToProcess:");
       HSColGroupStruct *group = singleGroup;
       while (group != NULL)
         {
           if (group->state == PENDING)
             {
               sprintf(LM->msg, "    %s (" PFSZ " bytes)",
                                group->colSet[0].colname->data(),
                                group->memNeeded);
               LM->Log(LM->msg);
             }
           group = group->next;
         }
     }

   return numColsToProcess;
 }

 // If we decide to create and load a sample table, deallocate column memory
 // and reset PENDING group states back to UNPROCESSED before creating and
 // loading the sample table. We'll call getColsToProcess to reallocate it
 // again afterwards.
 void HSGlobalsClass::deallocatePendingMemory(void)
 {
   for (HSColGroupStruct *group = singleGroup; group; group = group->next)
     {
       if (group->state == PENDING)
         {
           group->freeISMemory(TRUE,TRUE);
           group->state = UNPROCESSED;
         }
     }
 }

 // Select a set of columns for internal sort based on the amount of memory req'd, type,
 // and whether the column has already been processed. If ISonlyWhenBetter is true,
 // data from the existing histogram is consulted to see if the column is expected
 // to perform well with internal sort. However, if trySampleInMemory is true, we
 // select all columns if they will all fit in memory at once, and only consider
 // expected individual column performance if this can't be done.
 //
 Int32 HSGlobalsClass::selectSortBatch(Int64 rows,
                                       NABoolean ISonlyWhenBetter,
                                       NABoolean trySampleInMemory)
 {
   HSLogMan *LM = HSLogMan::Instance();

   HSColGroupStruct *group = singleGroup;
   Int32 count = 0;
   Int64 memAllowed = getMaxMemory();
   Int64 memLeft = memAllowed;
   Int64 mcMemUsed = 0;

   // account for at least one multi-column memory if MC IS is used
   if ( performISForMC() )
   {
      HSColGroupStruct *mgroup = multiGroup;
      while (mgroup)
      {
         if (mgroup->state == UNPROCESSED)
         {
            mcMemUsed = mgroup->memNeeded;
            memLeft -= mcMemUsed;
            break;
         }
         mgroup = mgroup->next;
      }
   }

   // Visit all unprocessed items looking for ones that fit. No early loop exit if memLeft
   // is 0, because it is very unlikely to hit zero exactly. If trySampleInMemory
   // is set, we ignore whether columns have uec too low to perform with internal
   // sort. If all column samples won't fit in memory, we do it over and take that
   // into account the second time.
   while (group != NULL)
     {
       if (group->state == OVERRAN)
         {
           group->state = UNPROCESSED;

           group->freeISMemory(TRUE,TRUE);  // free old memory

           // recalculate group->memNeeded based on what we now know about
           // average varchar size

           group->oldAvgVarCharSize = group->avgVarCharSize;
           group->avgVarCharSize = -1;  // to force a new computation
           group->setISlength(group->ISlength,maxCharColumnLengthInBytes);

           Int64 rows;
           if (sampleRowCount > 0)
             rows = sampleRowCount;
           else
             rows = actualRowCount;

           getMemoryRequirementsForOneGroup(group,rows);
         }

       if (group->state == UNPROCESSED &&
           group->memNeeded > 0 &&        // was set to 0 if exceeds address space
           group->memNeeded < memLeft &&
           isInternalSortType(group->colSet[0]) &&
           (trySampleInMemory || !ISonlyWhenBetter ||
            isInternalSortEfficient(rows, group)))  //@ZXuec
         {
           group->state = PENDING;
           count++;
           memLeft -= group->memNeeded;
           group = group->next;
         }
       else if (trySampleInMemory)
         {
           trySampleInMemory = false;
           if (LM->LogNeeded())
             LM->Log("Internal sort: could not fit entire sample in memory");
           if (ISonlyWhenBetter)
             {
               // Can't fit entire sample in memory and USTAT_INTERNAL_SORT was set
               // to HYBRID; reset everything and try again, choosing only columns
               // expected to perform well with internal sort.
               count = 0;
               memLeft = memAllowed;
               group = singleGroup;
               while (group != NULL)
                 {
                   if (group->state == PENDING)
                     group->state = UNPROCESSED;
                   group = group->next;
                 }
               group = singleGroup;  // restart at beginning
             }
           else
             group = group->next;
         }
       else
         group = group->next;
     }

   if (LM->LogNeeded())
     {
       LM->Log("Columns selected by selectSortBatch:");
       group = singleGroup;
       while (group != NULL)
       {
           if (group->state == PENDING)
             {
               sprintf(LM->msg, "    %s (" PFSZ " bytes)",
                                group->colSet[0].colname->data(),
                                group->memNeeded);
               LM->Log(LM->msg);
             }
           group = group->next;
       }

       if (performISForMC() && (mcMemUsed > 0))
       {
          sprintf(LM->msg, "    multi-column groups (" PFSZ " bytes)", multiGroup->memNeeded);
          LM->Log(LM->msg);
       }
     }


   return count;
 }

 // Reduce the percentage of physical memory to limit internal sort to, following
 // an allocation failure that proved our previous estimate too high. We arbitrarily
 // reduce it to 90% of what it was. We could get smarter about this, and base the
 // reduction on how much of what we recommended was successfully allocated before
 // the failure.
 void HSGlobalsClass::memReduceAllowance()
 {
   HSLogMan *LM = HSLogMan::Instance();

   ISMemPercentage_ *= .9f;

   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "Reducing ISMemPercentage_ to %f", ISMemPercentage_);
       LM->Log(LM->msg);
     }
 }

 // Takes corrective action when a memory allocation for internal sort could not
 // be made. Remove the offending column and remaining unallocated columns from
 // the current internal sort batch.
 //
 // Parameters:
 //   failedGroup - The single-col group which could not be allocated for.
 //   firstFailed - TRUE if the allocation failure was on the first column of the batch.
 //   rows - Number of rows the allocation is based on. Used for deleting object
 //          arrays.
 //
 void HSGlobalsClass::memRecover(HSColGroupStruct* failedGroup,
                                 NABoolean firstFailed,
                                 Int64 rows,
                                 HSColGroupStruct* mgr)
 {
   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded())
     {
       LM->Log("<<<Recovering from failed memory allocation for internal sort");
       sprintf(LM->msg, "Memory allocation failed for %s (" PF64 " rows)",
                        failedGroup->colSet[0].colname->data(), rows);
       LM->Log(LM->msg);
     }

   // Reset this and all subsequent pending columns -- no memory for them.
   // If the allocation failure was on the first column attempted, mark it and
   // other columns of equal or greater size as DONT_TRY. While it is only
   // necessary to give up on the first one to ensure that we don't get "stuck"
   // trying the same thing over and over, we shouldn't be bumping up so close
   // to the memory limit, so we avoid trying anything that big again.
   HSColGroupStruct* grp = failedGroup;
   do
     {
       if (firstFailed &&
           grp->memNeeded >= failedGroup->memNeeded &&
           (grp->state == PENDING || grp->state == UNPROCESSED))
         {
           grp->state = DONT_TRY;

           if (LM->LogNeeded())
             {
               sprintf(LM->msg, "Setting column to DONT_TRY: name=%s, memNeeded=" PFSZ,
                                 grp->colSet[0].colname->data(),
                                 grp->memNeeded);
               LM->Log(LM->msg);
             }

           // if MC in memory is enabled, we should also mark all MCs using this
           // column as DONT_TRY since we won't be able to compute them in memory
           if (performISForMC())
           {
               HSColGroupStruct* mgroup = mgr;

               while (mgroup != NULL)
               {
                   if (mgroup->state != DONT_TRY)
                   {
                      HSColGroupStruct *sgroup;
                      HSColumnStruct* col;
                      for (Int32 i=0; i<mgroup->colCount; i++)
                      {
                         if (grp->colSet[0].colnum == mgroup->colSet[i].colnum)
                         {
                            mgroup->state = DONT_TRY;
                            if (LM->LogNeeded())
                            {
                                sprintf(LM->msg, "MC: Setting MC to DONT_TRY: columns=(%s)",
                                                  mgroup->colSet[0].colname->data());
                                LM->Log(LM->msg);
                            }
                            break;
                         }
                      }
                   }
                   mgroup = mgroup->next;
               }
           }
         }
       else if (grp->state == PENDING)
         {
           grp->state = UNPROCESSED;
           if (LM->LogNeeded())
             {
               sprintf(LM->msg, "Setting column to UNPROCESSED: name=%s, memNeeded=" PFSZ,
                                 grp->colSet[0].colname->data(),
                                 grp->memNeeded);
               LM->Log(LM->msg);
             }
         }
     }
   while (grp = grp->next);

   if (LM->LogNeeded())
     LM->Log(">>>Finished recovery from failed memory allocation for internal sort");
 }

 // The data read into memory for IUS consists not only of the primary data
 // (from the existing persistent sample), but also the data for the rows to
 // be removed from the sample, and those to be inserted into the sample. The
 // allocation of memory for these three sets of data for a column must be
 // kept consistent, such that on a given pass, the three data sets for a
 // column should either all be in memory, or none of them.
 //
 // This function attempts to allocate the needed memory for all three data
 // sets for a given column selected for an IUS batch. If an allocation failure
 // occurs for one of the data sets, it ensures that any prior allocation for
 // a corresponding data set is undone, and that the state of corresponding
 // groups are all the same (typically removing the column from the current
 // batch by changing its state from PENDING to UNPROCESSED).
 Int32 HSGlobalsClass::allocateMemoryForIUSColumns(HSColGroupStruct* group,
                                                   Int64 rows,
                                                   HSColGroupStruct* delGroup,
                                                   Int64 delRows,
                                                   HSColGroupStruct* insGroup,
                                                   Int64 insRows)
 {
   HSLogMan *LM = HSLogMan::Instance();
   if (LM->LogNeeded())
     LM->StartTimer("Allocate storage for IUS columns");

   Int32 numCols = 0;
   HSColGroupStruct* firstPendingGroup = NULL;

   // To simplify the logic of keeping the three groups in sync, place them
   // and their row counts in arrays. On each iteration the array elements
   // are updated to point to the next group in the respective list.
   HSColGroupStruct* groupArr[] = {group, delGroup, insGroup};
   Int64 rowsArr[] = {rows, delRows, insRows};

   NABoolean gotMemory = TRUE;

   // Create storage for query results.
   do
   {
     if (groupArr[0]->state != PENDING)
       {
     	// Skip column not selected for this batch by advancing all 3 groups.
         for (Int16 i=0; i<3; i++)
           groupArr[i] = groupArr[i]->next;
         continue;
       }

     if (!firstPendingGroup)
       firstPendingGroup = groupArr[0];

     // Allocate all memory needed for storing values of each group. If unable
     // to do so for all groups, make necessary adjustments to group states and
     // set flag indicating memory shortfall.
     for (Int16 i=0; i<3 && gotMemory; i++)
       {
         if (!groupArr[i]->allocateISMemory(rowsArr[i]))
           {
         	// Recover from failed allocation (free any partial allocation and
         	// reset the group's state to UNPROCESSED). Also do this for any
         	// groups already allocated, e.g., if the allocation fails for
         	// delGroup, back out the allocation for the primary group for the
         	// column in question.
             Int16 j;
             HSColGroupStruct *grpi, *grpj;
             for (j=i; j>=0; j--)
               {
                 memRecover(groupArr[j], groupArr[0] == firstPendingGroup, rowsArr[j], NULL);
                 if (j < i)                     // free memory for groups in this set
                   groupArr[j]->freeISMemory(); //   that were already allocated
               }

             // For groups not allocated yet, don't need to free anything, but
             // make sure the states of corresponding groups are the same.
             // memRecover() will have changed the PENDING state of some of the
             // columns, and if the corresponding delGroup and/or insGroup are
             // not changed, they will be part of the queries to read the sample
             // decrement/increment, for columns that are not being processed in
             // this batch.
             for (j=i+1; j<3; j++)
               {
                 grpi = groupArr[i];
                 grpj = groupArr[j];
                 while (grpi)
                 {
                   grpj->state = grpi->state;
                   grpi = grpi->next;
                   grpj = grpj->next;
                 }
               }
             gotMemory = FALSE;
             memReduceAllowance();
           }
         else
           {
         	// Allocation was successful.
             groupArr[i]->nextData = groupArr[i]->data;
             groupArr[i]->mcis_nextData = groupArr[i]->mcis_data;
           }
       }

     // If the allocation was successful, increment the count of columns for
     // which memory was allocated, and advance to the next element in each
     // sequence of groups (primary, delete, and insert). If the allocation
     // was not successful, the loop will exit (recovery from the allocation
     // failure has been performed within the loop).
     if (gotMemory)
       {
         for (Int16 i=0; i<3; i++)
           groupArr[i] = groupArr[i]->next;
         numCols++;
       }

   } while (gotMemory && groupArr[0]);

   if (LM->LogNeeded())
     LM->StopTimer();

   return numCols;
 }

 // Allocates memory needed for internal sort for all columns marked as PENDING.
 //
 // Parameters:
 //   rows - Number of rows to base allocation on.
 //
 // Return value:
 //   Number of columns memory was successfully allocated for.
 //
 Int32 HSGlobalsClass::allocateMemoryForColumns(HSColGroupStruct* group,
                                                Int64 rows,
                                                HSColGroupStruct* mgr)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Int32 numCols = 0;
   HSColGroupStruct *firstPendingGroup = NULL;

   if (LM->LogNeeded())
     LM->StartTimer("Allocate storage for columns");

   // Create storage for query results.
   do
   {
      if (group->state != PENDING)
        continue;

      if (!firstPendingGroup)
        firstPendingGroup = group;

      // Allocate all memory needed for internal sort of the column. If unable,
      // to do so, make necessary adjustments to group states and bail out.
      if (!group->allocateISMemory(rows))
        {
          memRecover(group, group == firstPendingGroup, rows, mgr);
          memReduceAllowance();
          break;
        }
      //trafodion-2978
      //group->mcis_memFreed may be set TRUE in HSColGroupStruct::freeISMemory
      //so if allocate memory success,set group->mcis_memFreed to FALSE agin.
      if(group->mcis_memFreed)
          group->mcis_memFreed = FALSE;
      //trafodion-2978
      group->nextData = group->data;
      group->mcis_nextData = group->mcis_data;
      numCols++;
   } while (group = group->next);

   if (LM->LogNeeded())
     LM->StopTimer();

   return numCols;
 }

 Int32 HSGlobalsClass::allocateMemoryForInternalSortColumns(Int64 rows)
 {
   return allocateMemoryForColumns(singleGroup, rows, multiGroup);
 }

 Lng32 HSGlobalsClass::prepareToReadColumnsIntoMem(HSCursor *cursor, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   HSColGroupStruct *group = singleGroup;
   NAString internalSortQuery;

   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR,
                               "PREPARE_TO_READ_COLS_INTO_MEM", TRUE);

   // Create query to get data for the desired columns.
   internalSortQuery = "SELECT ";
   bool firstExpn = true;
   Int32 ct = 0;
   do
     {
       if (group->state == PENDING)
         {
           group->rowsRead = 0;
           group->sumSize = 0;

           if (firstExpn)
             firstExpn = false;
           else
             internalSortQuery.append(", ");
           internalSortQuery.append(group->ISSelectExpn);

           ct++;
         }
     }
   while (group = group->next);

   if ( ct == 0 ) return 0;

   internalSortQuery.append(" FROM ");
   // hssample_table->data() will be the real table name if a sample table is not used.
   internalSortQuery.append(hssample_table->data());

   Int64 hintRowCount = 0;
   if (sampleTableUsed)
   {
      hintRowCount = sampleRowCount;
   }
   else
   {
      hintRowCount = actualRowCount;
   }

   char cardHint[50];
   sprintf(cardHint, " <<+ cardinality %e >> ", (double)hintRowCount);
   internalSortQuery.append(cardHint);

   if (samplingUsed && !sampleTableUsed)
      internalSortQuery.append(sampleOption->data());

   internalSortQuery.append(" FOR READ UNCOMMITTED ACCESS");

   LM->Log("Preparing rowset...");
   // Allocate descriptors and statements for CLI and prepare rowset by
   // assigning location for results to be written.
   // prepareRowset may do retries
   retcode = cursor->prepareRowset(internalSortQuery.data(), FALSE, singleGroup,
        (Lng32)MINOF(MAX_ROWSET, rows));
   if (retcode < 0) HSHandleError(retcode) else retcode=0; // Set to 0 for warnings.
   LM->Log("...rowset prepared");

   return retcode;
 }


 /***********************************************/
 /* METHOD: readColumnsIntoMem()                */
 /* PURPOSE: reads a set of columns from a      */
 /*          table into memory so they can be   */
 /*          sorted internally.                 */
 /* PARAMETERS:                                 */
 /*   cursor -- Cursor to use to read rows from */
 /*             the table (may be a temporary   */
 /*             sample table).                  */
 /*   rows -- Maximum number of rows to read.   */
 /*           Memory for the column values has  */
 /*           been allocated based on this, so  */
 /*           don't exceed it.                  */
 /* RETURN CODE: 0 on success.                  */
 /*              -1 on failure.                 */
 /***********************************************/
 Lng32 HSGlobalsClass::readColumnsIntoMem(HSCursor *cursor, Int64 rows)
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   Int64 rowsLeft = rows;

   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR,
                               "READ_COLS_INTO_MEM", TRUE);

   prepareToReadColumnsIntoMem(cursor, rows);

   LM->Log("fetching rowsets...");
   if (LM->LogNeeded())
     LM->StartTimer("Fetching rowsets for internal sort");
   sampleRowCount = 0;
   while (retcode >= 0          // allow warnings
          && retcode != HS_EOF  // exit if no more data
          && rowsLeft > 0)      // internal CLI error if 0 used for # rows to read
     {
       retcode = cursor->fetchRowset();
       if (retcode == 0)  // 1 or more rows successfully read
         {
           sampleRowCount += cursor->rowsetSize();
           rowsLeft = rows - sampleRowCount;
           // We also compute average data size for VarChar columns, which is not
           // something processInternalSortNulls() method should be doing.
           // This new code (computing avg size) will be moved from
           // processInternalSortNulls() to a separate new method in the future.
           retcode = processInternalSortNulls(cursor->rowsetSize(), singleGroup);
           HSHandleError(retcode);
           Lng32 rowsetSize = (Lng32)MINOF(MAX_ROWSET, rowsLeft);
           if (rowsetSize > 0)
             retcode = cursor->setRowsetPointers(singleGroup,rowsetSize);
         }
     }

   // Deallocate buffer uncompacted varchars were read into prior to being compacted.
   HSColGroupStruct* group = singleGroup;
   while (group != NULL)
   {
     if (group->state == PENDING && group->isCompacted())
     {
       NADELETEBASIC((short*)(group->varcharFetchBuffer), STMTHEAP);
       group->varcharFetchBuffer = NULL;
     }
     group = group->next;
   }

   if (retcode < 0) HSHandleError(retcode) else retcode=0; // Set to 0 for warnings.

   // some post-reading to memory processing to support MC in-memory computation
   if ( performISForMC() )
   {
      group = singleGroup;
      do
      {
         if (group->state == PENDING)
         {
            // free memory used by null bitmap if column has no null values
            if ((group->mcis_nullIndBitMap != NULL) && (group->nullCount == 0))
            {
               NADELETEBASIC(group->mcis_nullIndBitMap, STMTHEAP);
               group->mcis_nullIndBitMap = NULL;
            }

            // in case we are reading this column again to memory after it was already
            // sorted because of MC IS, we need to set the group as PROCESSED so
            // it does not go through the sorting logic again
            if (group->mcis_readAsIs == TRUE)
               group->state = PROCESSED;
         }
      }
      while (group = group->next);

   }

   if (LM->LogNeeded())
     {
       LM->StopTimer();

       char intStr[30];
       convertInt64ToAscii(sampleRowCount, intStr);
       sprintf(LM->msg, "HSGlobalsClass::readColumnsIntoMem(): %s rows read in", intStr);
       LM->Log(LM->msg);
     }

   LM->Log("...done fetching rowsets");
   return retcode;
 }

 // Invoke the quicksort template instantiation for the passed column's type.
 // Return code: 0 on success, -1 on failure.
 Lng32 doSort(HSColGroupStruct *group)
 {
   Lng32 retcode = 0;
   char errtxt[100]={0};
   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, errtxt, TRUE);

   HSLogMan *LM = HSLogMan::Instance();

   // Sort routine can't handle empty array. group->data is ptr to start of
   // array of values, group->nextData points to 1st address after end of array.
   // If group->nextData is not greater, there is no data to sort.
   if (group->nextData <= group->data)
     return retcode;

   // Initiate sort for specific type by calling the quicksort template function.
   recDepth = maxRecDepth = 0;

   if (LM->LogNeeded())
     {
       sprintf(LM->msg, "Do quicksort for column %s",
                        group->colSet[0].colname->data());
       LM->StartTimer(LM->msg);
     }

   switch (group->ISdatatype)
     {
       case REC_BIN8_SIGNED:
         quicksort((Int8*)group->data, 0,
                   (Int8*)group->nextData - (Int8*)group->data - 1);
         break;

       case REC_BOOLEAN:
       case REC_BIN8_UNSIGNED:
         quicksort((UInt8*)group->data, 0,
                   (UInt8*)group->nextData - (UInt8*)group->data - 1);
         break;

       case REC_BIN16_SIGNED:
         quicksort((short*)group->data, 0,
                        (short*)group->nextData - (short*)group->data - 1);
         break;

       case REC_BIN16_UNSIGNED:
         quicksort((unsigned short*)group->data, 0,
                        (unsigned short*)group->nextData - (unsigned short*)group->data - 1);
         break;

        case REC_BIN32_SIGNED:
         quicksort((Int32*)group->data, 0,
                        (Int32*)group->nextData - (Int32*)group->data - 1);
         break;

       case REC_BIN32_UNSIGNED:
         quicksort((UInt32*)group->data, 0,
                        (UInt32*)group->nextData - (UInt32*)group->data - 1);
         break;

       case REC_BIN64_SIGNED:
         quicksort((Int64*)group->data, 0,
                        (Int64*)group->nextData - (Int64*)group->data - 1);
         break;

       case REC_BIN64_UNSIGNED:
         quicksort((UInt64*)group->data, 0,
                        (UInt64*)group->nextData - (UInt64*)group->data - 1);
         break;

       case REC_IEEE_FLOAT32:
         quicksort((float*)group->data, 0,
                        (float*)group->nextData - (float*)group->data - 1);
         break;

       case REC_IEEE_FLOAT64:
         quicksort((double*)group->data, 0,
                        (double*)group->nextData - (double*)group->data - 1);
         break;

       case REC_BYTE_F_ASCII:
       case REC_BYTE_F_DOUBLE:
       {
         //
         // Set the GLOBAL ISFixedChar instance with this column's values
         //
         ISFixedChar::setLength(group->ISlength);
         ISFixedChar::setCaseInsensitive(group->colSet[0].caseInsensitive == 1);
         ISFixedChar::setColCollation(group->colSet[0].colCollation);
         ISFixedChar::setCharSet(group->colSet[0].charset);

         // Allocate buffers for operator == and operator < to use.
         Int16 nPasses = CollationInfo::getCollationNPasses(group->colSet[0].colCollation);
         Int32   encodeKeyBufLen = group->ISlength * nPasses + 2 + nPasses;

         if ( encodeKeyBufLen > lengthOfSortBufrs )
         {
            //free memory for smaller buffers
            if ( sortBuffer1 ) NADELETEBASIC(sortBuffer1, STMTHEAP);
            if ( sortBuffer2 ) NADELETEBASIC(sortBuffer2, STMTHEAP);

            sortBuffer1 = new (STMTHEAP) char[encodeKeyBufLen];
            sortBuffer2 = new (STMTHEAP) char[encodeKeyBufLen];
            lengthOfSortBufrs = encodeKeyBufLen ;
         }

         quicksort((ISFixedChar*)group->data, 0,
                        (ISFixedChar*)group->nextData - (ISFixedChar*)group->data - 1);
         break;
       }
       case REC_BYTE_V_ASCII:
       case REC_BYTE_V_DOUBLE:
       {
         //
         // Set the GLOBAL ISVarChar instance with this column's values
         //
         ISVarChar::setCaseInsensitive(group->colSet[0].caseInsensitive == 1);
         ISVarChar::setColCollation(group->colSet[0].colCollation);
         ISVarChar::setCharSet(group->colSet[0].charset);

         Int16 nPasses = CollationInfo::getCollationNPasses(group->colSet[0].colCollation);
         Int32 encodeKeyBufLen = group->ISlength * nPasses + 2 + nPasses;

         if ( encodeKeyBufLen > lengthOfSortBufrs )
         {
            //free memory for smaller buffers
            if ( sortBuffer1 ) NADELETEBASIC(sortBuffer1, STMTHEAP);
            if ( sortBuffer2 ) NADELETEBASIC(sortBuffer2, STMTHEAP);

            sortBuffer1 = new (STMTHEAP) char[encodeKeyBufLen];
            sortBuffer2 = new (STMTHEAP) char[encodeKeyBufLen];
            lengthOfSortBufrs = encodeKeyBufLen ;
         }

         quicksort((ISVarChar*)group->data, 0,
                        (ISVarChar*)group->nextData - (ISVarChar*)group->data - 1);
         break;
       }
       default:
         sprintf(errtxt, "doSort(): unknown type %d", group->ISdatatype);
         sprintf(LM->msg, "INTERNAL ERROR: %s", errtxt);
         LM->Log(LM->msg);
         retcode = -1;
         HSHandleError(retcode);
         break;
     }

   if (LM->LogNeeded())
     {
       LM->StopTimer();
       sprintf(LM->msg, "Maximum recursion depth for %s was %d",
               group->colSet[0].colname->data(), maxRecDepth);
       LM->Log(LM->msg);
     }
   if (recDepth != 0)
     {
       sprintf(errtxt, "doSort(): Recursion depth should be 0.");
       sprintf(LM->msg, "INTERNAL ERROR: %s", errtxt);
       LM->Log(LM->msg);
       retcode = -1;
       HSHandleError(retcode);
     }
   return retcode;
 }

 /************************************************/
 /* METHOD: sortByColInMem()                     */
 /* PURPOSE: Iterate through single-column group */
 /*          list, and call doSort() for those   */
 /*          that are in the current internal    */
 /*          sort batch.                         */
 /* RETURN CODE: 0 on success, -1 on failure.    */
 /************************************************/
 Lng32 HSGlobalsClass::sortByColInMem()
 {
   HSColGroupStruct *group = singleGroup;
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;

   // Sort for each column
   LM->StartTimer("Do internal sort for single-columns");
   do
   {
     if (group->state == PENDING)
       {
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "sortByColInMem: Starting sort of column %s",
                              group->colSet[0].colname->data());
             LM->Log(LM->msg);
           }

         (void)getTimeDiff(TRUE);
         retcode = doSort(group);
         HSHandleError(retcode);
         checkTime("after sorting column in memory");
         group->colSecs = getTimeDiff();  // saved for automation

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "sortByColInMem: Finished sort of column %s",
                              group->colSet[0].colname->data());
             LM->Log(LM->msg);
           }
       }
   } while (group = group->next);
   LM->StopTimer();
   return retcode;
 }


 // Do an in-place quicksort on sortArr, using wide pivots.
 //
 template <class T>
 void quicksort(T* sortArr, Int64 lowInx, Int64 highInx)
 {
   Int64 pivotWidth;
   Int64 pivotInx = (Int64)(lowInx + ((double)rand() / RAND_MAX) * (highInx - lowInx));

   recDepth++;
   if (recDepth > maxRecDepth)
     maxRecDepth = recDepth;

   pivotInx = placeWidePivot(sortArr, lowInx, highInx,
                             pivotInx, pivotWidth);

   if (pivotInx > -1)
     {
       if (lowInx < pivotInx - 1)
         quicksort(sortArr, lowInx, pivotInx - 1);
       if (pivotInx + pivotWidth < highInx)
         quicksort(sortArr, pivotInx + pivotWidth, highInx);
     }
   recDepth--;
 }

 // The wide pivot optimization implemented by this function correctly places
 // all instances of the designated pivot value in a single call. Instances of
 // the pivot value are counted as they are encountered, and overwritten with
 // a value from the end of the list. When the correct location for the pivot
 // value is found, the n occurrences of the value are written starting at
 // that location, after moving the current data at those locations to the
 // vacated spots at the end of the list.
 //
 // Input params:
 //   sortArr -- array of values containing data to sort.
 //   lowInx, highInx -- boundaries of the portion of the array to sort.
 //   pivotInx -- index within the array of the value to use as the pivot.
 // Output param:
 //   pivotWidth -- number of occurrences of the pivot value found and placed.
 // Return value:
 //   The index at which the first of the pivot values was placed.
 //
 template <class T>
 Int64 placeWidePivot(T* sortArr, Int64 lowInx, Int64 highInx, Int64 pivotInx,
                      Int64& pivotWidth)
 {
   // endPtr is the final position in the array, while lastPtr is the position
   // of the rearmost value that hasn't been used to overwrite an instance of
   // the pivot value. It is decremented each time a value from the end is
   // moved to the location of a discovered pivot instance.
   T *endPtr, *currPtr, *storePtr, *lastPtr;
   T temp;

   // Can't use reference for this, because the array element will be moved.
   // Have to use copy ctor to create new object for char wrapper classes.
   const T pivot = sortArr[pivotInx];

   pivotWidth = 0;
   currPtr = sortArr + lowInx;
   storePtr = sortArr + lowInx;
   lastPtr = sortArr + highInx;
   while (currPtr <= lastPtr)
     {
       // If the next value to look at is less than the pivot, swap it with the
       // value at storePtr and increment storePtr -- all items prior to storePtr
       // must be less than the pivot value.
       if (*currPtr < pivot)
         {
           temp = *storePtr;
           *storePtr = *currPtr;
           *currPtr = temp;
           storePtr++;
           currPtr++;
         }
       else if (*currPtr == pivot)
         {
           // Don't increment currPtr here, we need to check the new value moved into
           // this location. Just increment the number of pivot values found, and
           // overwrite it with the value at the end of the list.
           pivotWidth++;
           *currPtr = *lastPtr--;
         }
       else
         currPtr++;
     }

   // All values have been checked, and all those less than the pivot value have
   // been moved to the left of storePtr, so storePtr is the location we want to
   // start writing the pivotWidth instances of the pivot value. First, we clear
   // space for the pivotWidth values starting at storePtr to the locations we
   // previously vacated at the end of the array.
   endPtr = sortArr + highInx;
   currPtr = MINOF(lastPtr, storePtr+pivotWidth-1);
   while (currPtr >= storePtr)
     {
       *endPtr-- = *currPtr--;
     }

   // Now move the pivot value instances into the proper location, which begins
   // at storePtr.
   currPtr = storePtr;
   while (currPtr < storePtr+pivotWidth)
     {
       *currPtr++ = pivot;
     }

   // If pivotWidth == high - low, then there is a single non-pivot value, but
   // it is guaranteed to be in the right place, so we still return -1 to
   // indicate no further recursion is required for this partition.
   //
   return (pivotWidth >= highInx - lowInx ? -1 : storePtr - sortArr);
 }


 template <class T>
 void checkForBackwardness(HSGlobalsClass * hsGlobals, HSColGroupStruct *group, T * listitem1, T * listitem2)
 {
   if (*listitem1 > *listitem2)
     {
       group->backwardWarningCount++;
       if (group->backwardWarningCount < 5)  // report this warning at most 5 times per column
         {
           // raise a warning that we found data in backwards order, which means
           // we might get out-of-order histograms, which is bad
           hsGlobals->diagsArea << DgSqlCode(UERR_UNEXPECTED_BACKWARDS_DATA)
                << DgString0(group->colSet[0].colname->data());
         }
     }
 }

 template < >
 void checkForBackwardness(HSGlobalsClass * hsGlobals, HSColGroupStruct *group,
 MCWrapper * listitem1, MCWrapper * listitem2)
 {
   // TODO: write this method when necessary; it's a no-op for now
 }


 // The data in the column's data array has been sorted but not grouped.
 // Iterate over the values, counting duplicates. When a new value is
 // encountered, create a new group, consisting of a distinct value and
 // its occurrence count. An initial number of groups must be stored up
 // until we have enough to apply the required adjustment to the number of
 // intervals to use. Ultimately, each group (value/count pair) is passed to
 // addIntervalData() to be incorporated into an interval.
 // The purpose of dummyPtr is to cause the correct template instantiation
 // to be used.
 //
 template <class T>
 void createHistogram(HSColGroupStruct *group, Lng32 numIntervals,
                      Int64 estRowCount, NABoolean usingSample, T* dummyPtr)
 {
   Int64 numValues = (T*)group->nextData - (T*)group->data;
   NABoolean singleIntervalPerUec = FALSE;
   NABoolean allGroupsSeen = FALSE;
   Int32 adjustedIntervalCount;
   Lng32 numGapIntervals;
   Lng32 numHighFreqIntervals;

   // This should not happen unless all the rows in the table are deleted between
   // the time we check the row count and the time the data is actually read, but
   // we have to watch out for it or an infinite loop could occur.
   if (numValues <= 0)
     return;

   HSGlobalsClass *hsGlobals = GetHSContext();
   boundarySet<T> *distinctValues = new (STMTHEAP) boundarySet<T>;

   T *listitem1 = (T*)group->data;  // adjacent sorted values
   T *listitem2 = listitem1+1;
   Int32 numRows;                     // number of rows with same value
   Int64 valueIndex = 0;            // index of current raw value
   Int32 valueCountIndex;             // index of distinct (grouped) value

   NABoolean firstRowset = TRUE;
   double gapAvgSoFar = 0;
   Int64 gapCountSoFar = 0;
   double gapMultiplier = 0;
   NABoolean bigGap;
   double currGapMagnitude;

 do
  {
   valueCountIndex = 0;
   numRows = 0;
   while (valueCountIndex < MAX_ROWSET && valueIndex < numValues-1)
     {
       numRows++;
       if (*listitem1 != *listitem2)
         {
           // Do this for each distinct value.
           distinctValues->data[valueCountIndex] = *listitem1;
           distinctValues->dataSum[valueCountIndex] = numRows;
           distinctValues->nullInd[valueCountIndex] = 0; // nulls already handled
           valueCountIndex++;
           numRows=0;
         }
       checkForBackwardness(hsGlobals,group,listitem1,listitem2);
       listitem1++;
       listitem2++;
       valueIndex++;
     }

   // If we exited the above loop because we ran out of values before running out
   // of room for them, add the final distinct value to the list (loop exits with
   // 1 distinct value outstanding). In the rare case that the number of distinct
   // values is a multiple of MAX_ROWSET, and the final distinct value is a
   // singleton, both conditions of the above loop become true at the same time,
   // and the following 'if' will be false. In that case there is a single value
   // with a single occurrence left, and it will be handled in the next iteration
   // of the do...while(!allGroupsSeen) loop. In that final iteration, the while
   // loop above will not be entered because the condition on valueIndex is still
   // true, and the 'if' below will be true this time and process the final value.
   if (valueIndex == numValues - 1 && valueCountIndex < MAX_ROWSET)
     {
       numRows++;
       distinctValues->data[valueCountIndex] = *listitem1;
       distinctValues->dataSum[valueCountIndex] = numRows;
       distinctValues->nullInd[valueCountIndex] = 0; // nulls already handled
       valueCountIndex++;
       allGroupsSeen = TRUE;
     }

   distinctValues->size = valueCountIndex;

   // Need to determine that we have a certain number of intervals before the
   // interval count can be adjusted, and have to have the adjusted interval
   // count before instantiating the HSHistogram object. Do this only after
   // forming the first set of groups.
   if (firstRowset)
     {
       adjustedIntervalCount =
           hsGlobals->getAdjustedIntervalCount(group,
                                               numIntervals,
                                               estRowCount,
                                               valueCountIndex,
                                               singleIntervalPerUec,
                                               numGapIntervals,
                                               numHighFreqIntervals);

       // Now that we have adjusted the interval count, the HSHistogram can be
       // created.
       //@ZX add nullCount to non-null count to get same results as existing code,
       //    although we get a better dispersal of values by omitting nulls from the
       //    calculation (they go into their own interval).
       group->groupHist = new (STMTHEAP) HSHistogram(adjustedIntervalCount,
                                                numValues + group->nullCount,
                                                numGapIntervals,
                                                numHighFreqIntervals,
                                                usingSample,
                                                singleIntervalPerUec);
       gapMultiplier = group->groupHist->getGapMultiplier();
     }

   if (numGapIntervals > 0)
     profileGaps(group, distinctValues, gapAvgSoFar, gapCountSoFar,
                 firstRowset);

   firstRowset = FALSE;

   // Pass the buffered distinct value/count pairs one at a time to the function
   // that forms intervals, and then proceed reading values and forming groups. Note
   // that if the buffer contains all the groups (i.e., the sorted data was exhausted
   // while forming the buffered groups), we pass TRUE as the last argument to
   // addIntervalData() for the final group, since there will no more groups passed
   // to it below.
   for (Int32 i=0; i<valueCountIndex; i++)
     {
       currGapMagnitude = distinctValues->gapMagnitude[i];
       bigGap = currGapMagnitude > gapAvgSoFar * gapMultiplier
                   // high frequency intervals not considered as possible gap intervals
                && distinctValues->dataSum[i] <= group->groupHist->getHighFreqThreshold()
                && group->groupHist->gapKeeper_.insert(currGapMagnitude);
       group->groupHist->addIntervalData(distinctValues->data[i],
                                         group,
                                         distinctValues->dataSum[i],
                                         bigGap,
                                         currGapMagnitude,
                                         allGroupsSeen && i == valueCountIndex-1);
     }

  } while (!allGroupsSeen);

  hsGlobals->checkTime("after forming intervals for a single column");

  // Now that all distinct values and their frequencies have been seen, we
  // know the actual gap average. Revisit the gap intervals we created,
  // and keep the gapIntCount ones with the highest gap magnitude. The rest
  // are merged into adjacent intervals, unless that would create an interval
  // of excessive height.
  if (numGapIntervals > 0)
    group->groupHist->removeLesserGapIntervals(gapAvgSoFar);
 }


 // For each column in the current internal sort batch, call the template
 // function to create a histogram from the sorted column values.
 //
 // Parameters:
 //   rowsAllocated - Number of rows memory allocation was based on. Used to
 //                   delete object arrays allocated for the columns just
 //                   processed.
 // Return code: 0 on success, -1 on failure.
 //
 Lng32 HSGlobalsClass::createStats(Int64 rowsAllocated)
 {
   Lng32 retcode = 0;

   HSColGroupStruct *group = singleGroup;
   // Create histogram for each column in this batch (denoted by state=PENDING).
   do
   {
     if (group->state != PENDING)
       continue;

     retcode = createStatsForColumn(group, rowsAllocated);
     group->groupHist->logIntervals();

   } while (group = group->next);

   return retcode;
 }

 Lng32 HSGlobalsClass::createStatsForColumn(HSColGroupStruct *group, Int64 rowsAllocated)
 {
   Lng32 retcode = 0;
   char errtxt[100]={0};
   HSErrorCatcher errorCatcher(retcode, - UERR_INTERNAL_ERROR, errtxt, TRUE);

   HSLogMan *LM = HSLogMan::Instance();

   // Create histogram for the group.

     // Invoke template function to create histogram for the column's type.
     switch (group->ISdatatype)
     {
       case REC_BIN8_SIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (Int8*)NULL);
         break;

       case REC_BOOLEAN:
       case REC_BIN8_UNSIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (UInt8*)NULL);
         break;

        case REC_BIN16_SIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (short*)NULL);
         break;

       case REC_BIN16_UNSIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (unsigned short*)NULL);
         break;

      case REC_BIN32_SIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (Int32*)NULL);
         break;

       case REC_BIN32_UNSIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (UInt32*)NULL);
         break;

       case REC_BIN64_SIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (Int64*)NULL);
         break;

       case REC_BIN64_UNSIGNED:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (UInt64*)NULL);
         break;

       case REC_BYTE_F_ASCII:
       case REC_BYTE_F_DOUBLE:
         //
         // Set the GLOBAL ISFixedChar instance with this column's values
         //
         ISFixedChar::setLength(group->ISlength);
         ISFixedChar::setCaseInsensitive(group->colSet[0].caseInsensitive == 1);
         ISFixedChar::setColCollation(group->colSet[0].colCollation);
         ISFixedChar::setCharSet(group->colSet[0].charset);

         createHistogram(group, intCount, sampleRowCount, samplingUsed, (ISFixedChar*)NULL);
         break;

       case REC_BYTE_V_ASCII:
       case REC_BYTE_V_DOUBLE:
         //
         // Set the GLOBAL ISVarChar instance with this column's values
         //
         ISVarChar::setCaseInsensitive(group->colSet[0].caseInsensitive == 1);
         ISVarChar::setColCollation(group->colSet[0].colCollation);
         ISVarChar::setCharSet(group->colSet[0].charset);

         createHistogram(group, intCount, sampleRowCount, samplingUsed, (ISVarChar*)NULL);
         break;

       case REC_IEEE_FLOAT32:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (float*)NULL);
         break;

       case REC_IEEE_FLOAT64:
         createHistogram(group, intCount, sampleRowCount, samplingUsed, (double*)NULL);
         break;

       default:
         sprintf(errtxt, "createStats(): unknown type %d", group->ISdatatype);
         sprintf(LM->msg, "INTERNAL ERROR: %s", errtxt);
         LM->Log(LM->msg);
         retcode=-1;
         HSHandleError(retcode);
         break;
     }

     if (LM->LogNeeded())
       {
         sprintf(LM->msg, PF64 " nulls found for column %s", group->nullCount,
                          group->colSet[0].colname->data());
         LM->Log(LM->msg);
       }
     if (group->nullCount)
     {
       // If the column has all NULLs, then groupHist will not have been allocated
       // in the call to CreateHistogram.  So, create it here.
       if (!group->groupHist)
         group->groupHist = new(STMTHEAP) HSHistogram(intCount,
                                                      group->nullCount,
                                                      0, // numGapIntervals
                                                      0, // numHighFreqIntervals
                                                      samplingUsed,
                                                      FALSE //singleIntervalPerUec
                                                     );
       group->groupHist->addNullInterval(group->nullCount, group->colCount);
     }

     // Upscale rowcounts and estimate UECs when sampling.
     if (samplingUsed && sampleRowCount > 0 && actualRowCount > sampleRowCount)
     {
       retcode = FixSamplingCounts(group);
       HSHandleError(retcode);
       if (group->groupHist) group->groupHist->deleteFiArray();
     }

     // This is the final step of processing for internal sort, so mark the
     // column as PROCESSED so it won't be considered again.
     group->state = PROCESSED;

     // Free up the column's data now that we're done with it. This would be
     // done for us at end of statement, but we are dealing with large lumps
     // of memory for possibly long times, so we free it as soon as possible.
     // If MC IS is ON and there are MCs using this column, then keep this
     // column in memory until all MCs using it are properly computed
     if ( !HSGlobalsClass::performISForMC() || group->mcs_usingme == 0)
     {
        group->freeISMemory(TRUE,(group->mcs_usingme==0));
     }

   return retcode;
 }

 /************************************************/
 /* METHOD:  log()                               */
 /* PURPOSE: Write selected information to the   */
 /*          log. This is done for just-in-time  */
 /*          to collect information about the    */
 /*          Update Stats statement at the point */
 /*          of failure.                         */
 /* INPUT:   Pointer to the Log Manager instance.*/
 /************************************************/
 void HSGlobalsClass::log(HSLogMan* LM)
 {
   // Show table stats are being collected for.
   sprintf(LM->msg, "Updating stats for table %s",
                     objDef->getObjectFullName().data());
   LM->Log(LM->msg);

   // Show actual and sample row counts.
   sprintf(LM->msg, "Actual row count = " PF64, actualRowCount);
   LM->Log(LM->msg);
   sprintf(LM->msg, "Sample row count = " PF64, sampleRowCount);
   LM->Log(LM->msg);

   // Whether or not the statement is due to a request from compiler.
   sprintf(LM->msg, "Requested by compiler?: %s",
                    requestedByCompiler ? "Yes" : "No");
   LM->Log(LM->msg);

   // Show the single-column groups this statement is creaing histograms for.
   HSColGroupStruct* group = singleGroup;
   LM->Log("\nSingle-Column Groups");
   LM->Log(  "--------------------");
   while (group)
     {
       if (group->colNames)
         LM->Log(group->colNames->data());
       else
         LM->Log("<name not available>");
       group = group->next;
     }

   // Show the multi-column groups this statement is creaing histograms for.
   group = multiGroup;
   LM->Log("\nMulti-Column Groups");
   LM->Log(  "-------------------");
   while (group)
     {
       if (group->colNames)
         {
           sprintf(LM->msg, "(%s)", group->colNames->data());
           LM->Log(LM->msg);
         }
       else
         LM->Log("<names not available>");
       group = group->next;
     }
 }

 NABoolean HSGlobalsClass::wherePredicateSpecifiedForIUS()
 {
   return  optFlags & IUS_OPT;
 }

 NAString& HSGlobalsClass::getWherePredicateForIUS()
 {
    if (ius_where_condition_text == NULL)
       ius_where_condition_text = new(CTXTHEAP) NAString("");

    return (*ius_where_condition_text);
 }

 // Return the following string in the queryText parameter:
 // delete from <smplTable> where <whereCondition>
 void HSGlobalsClass::generateIUSDeleteQuery(const NAString& smplTable,
                                             NAString& queryText,
                                             NABoolean transactional)
 {
   if (transactional)
     queryText = "DELETE FROM ";
   else
     queryText = "DELETE WITH NO ROLLBACK FROM ";

   queryText.append(smplTable.data());

   NAString& whereClause = getWherePredicateForIUS();
   if (whereClause.length() > 0) {
     queryText.append(" WHERE ");
     queryText.append(whereClause);
   }
 }

 // Create statement to add rows to the IUS persistent sample table.
 //   upsert using load into into <smplTable>...
 //
 // If doing full IUS, the new sample table rows are already in the temporary
 // _I table, and the source for the upsert is
 //       (select * from <smplTable>_I)
 //
 // If a limited IUS (update persistent sample table and generate histograms),
 // the source for the upsert is the source table with the IUS where predicate
 // and sampling rate applied:
 //       (select * from <sourceTable> where <predicate> sample random <sampleRate> percent)
 void HSGlobalsClass::generateIUSSelectInsertQuery(const NAString& smplTable,
                                                   const NAString& sourceTable,
                                                   NAString& queryText)
 {
   queryText.append("UPSERT USING LOAD INTO "); // for algorithm 1
   queryText.append(smplTable.data());
   queryText.append(" (SELECT ");

   // Generate the select list. Truncate any over-long char/varchar columns
   // by using SUBSTRING calls. Omit any LOB columns.
   objDef->addTruncatedSelectList(queryText);

   queryText.append(" FROM ");

   if (CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) == DF_ON)
     {
       queryText.append(smplTable.data());
       queryText.append("_I)");
     }
   else
     {
       queryText.append(sourceTable.data());
       queryText.append(" where ");
       queryText.append(getWherePredicateForIUS());
       NAString sampleOpt;
       createSampleOption(SAMPLE_RAND_1,
                          sampleRateAsPercetageForIUS * 100.0,
                          sampleOpt, 0, 0);
       queryText.append(sampleOpt);
       queryText.append(")");
     }
 }

 NABoolean HSGlobalsClass::okToPerformIUS()
 {
   return CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) != DF_OFF;
 }

 // If IUS cqd is set to full ON position, use IUS to incrementally update
 // histograms as well as persistent sample table.
 NABoolean HSGlobalsClass::useIUSForHistograms()
 {
   return CmpCommon::getDefault(USTAT_INCREMENTAL_UPDATE_STATISTICS) == DF_ON;
 }

 NABoolean HSGlobalsClass::getPersistentSampleTableForIUS(NAString& tableName,
                 Int64 &requestedRows, Int64 &sampleRows, double &sampleRate,
                 NABoolean forceToFetch)
 {
   HSLogMan *LM = HSLogMan::Instance();
   LM->StartTimer("IUS: Read from persistent_samples table to find S(i-1)");

   if ( !okToPerformIUS() )
      return FALSE;

   // Fetch the IUS sample table name from SB_PERSISTENT_SAMPLES.

   HSPersSamples *sampleList = HSPersSamples::Instance(objDef->getCatName(),
                                                       objDef->getSchemaName());
   if ( !sampleList ) return FALSE;

   Lng32 retcode = sampleList->find(objDef, 'I', tableName,
                                    requestedRows, sampleRows, sampleRate
                                   );

   if ( retcode == 0 )
     sampleRate /= 100; // Remove the 100x factor from the percent rate value
                        // obtained from the persistent_samples meta-data table.

   LM->StopTimer();

   return ( retcode == 0 && tableName.length() > 0);
 }

 template <class T>
 double delta(T x, T y)
 {
   return (double)((x>y) ? x-y : y-x);
 }

 template <class T>
 T HSGlobalsClass::convertToISdatatype(T* dummy,  // just so compiler can instantiate
                                       const HSDataBuffer& valToConvert,
                                       HSColGroupStruct* group)
 {
   // First check for empty source value. This happens for MFV of 1st interval.
   if (valToConvert.length() == 0)
     return 0;

   T val;
   HSColumnStruct& col = group->colSet[0];
   ComDiagsArea diagsArea;
   ComDiagsArea *diagsAreaPtr = &diagsArea;
   Lng32 dataConversionErrorFlag;
   if (col.datatype == REC_DATETIME ||
       col.datatype >= REC_MIN_INTERVAL && col.datatype <= REC_MAX_INTERVAL)
     {
       // We have to extract the body of the datetime or interval literal value,
       // because convDoIt() doesn't handle the full literal syntax. For example,
       // if the value is INTERVAL'20 6' YEAR TO MONTH, we have to pass "20 6".
       // convDoIt() doesn't produce datetime values that are consistent with the
       // internal representation we use for IS/IUS, so we have specialized
       // conversions for those.
       NAWchar* litBodyPtr = (NAWchar*)(valToConvert.data());
       NAWchar* lastCharPtr = litBodyPtr + valToConvert.numChars() - 1;
       while (litBodyPtr < lastCharPtr && *litBodyPtr != L'\'')
         litBodyPtr++;
       HS_ASSERT(litBodyPtr < lastCharPtr);
       litBodyPtr++;  // Go past single quote to 1st char of literal body
       while (lastCharPtr > litBodyPtr && *lastCharPtr != L'\'')
         lastCharPtr--;
       HS_ASSERT(lastCharPtr > litBodyPtr);

       if (col.datatype == REC_DATETIME)
         {
           // Datetime returned by convDoIt is a sequence of integer fields: 2 bytes
           // for year. 1 each for month/day/hour/minute/second, 4 bytes for
           // fractional precision.
           static const Int16 DFV_LEN = sizeof(Int16) + 5 + sizeof(Int32);
           char dateFieldValues[DFV_LEN];
           const char* dfvPtr = dateFieldValues;
           convDoIt((char*)litBodyPtr, (lastCharPtr - litBodyPtr) * sizeof(NAWchar), REC_BYTE_F_DOUBLE, 0, 0,
                   dateFieldValues, DFV_LEN, col.datatype, col.precision, col.scale,
                   NULL, 0, STMTHEAP, &diagsAreaPtr,
                   CONV_UNKNOWN,
                   &dataConversionErrorFlag,
                   0);
           switch (col.precision)
             {
               case REC_DTCODE_DATE:
                 {
                   // Copy year to ensure alignment.
                   Int16 year;
                   memcpy(&year, dfvPtr, sizeof year);
                   val = (T)ExpDatetime::getTotalDays(year,
                                                      *(dfvPtr+2),  // month
                                                      *(dfvPtr+3)); // day
                 }
                 break;
               case REC_DTCODE_TIME:
                 {
                   val = (T)(*dfvPtr * 3600 +
                             *(dfvPtr+1) * 60 +
                             *(dfvPtr+2));
                   if (col.scale)
                     {
                       // Copy fractional seconds field for alignment.
                       Int32 fracSec;
                       val *= (T)pow(10, col.scale);
                       memcpy(&fracSec, dfvPtr+3, sizeof fracSec);
                       val += fracSec;
                     }
                 }
                 break;
               case REC_DTCODE_TIMESTAMP:
                 val = (T)DatetimeType::julianTimestampValue
                                          (dfvPtr,
                                           col.scale ? DFV_LEN : DFV_LEN - sizeof(Int32),
                                           col.scale);
                 break;
               default:
                 HS_ASSERT(FALSE);
                 break;
             }
         }
       else  // an interval of some sort
         {
           convDoIt((char*)litBodyPtr, (lastCharPtr - litBodyPtr) * sizeof(NAWchar), REC_BYTE_F_DOUBLE, 0, 0,
                   (char*)&val, sizeof(T), col.datatype, col.precision, col.scale,
                   NULL, 0, STMTHEAP, &diagsAreaPtr,
                   CONV_UNKNOWN,
                   &dataConversionErrorFlag,
                   0);
         }
     }
   else if (col.datatype >= REC_MIN_DECIMAL && col.datatype <= REC_MAX_DECIMAL ||
            col.datatype >= REC_MIN_BINARY && col.datatype <= REC_MAX_BINARY)    //scale > 0, per caller
     {
       // The fractional part has been normalized to use the full number of scale
       // digits (e.g., 123.1 is represented as "123.100" for a Numeric(6,3)).
       T intPart = 0, fracPart = 0;
       if (sizeof(T) > 4)
         na_swscanf((const NAWchar*)valToConvert.data(), L"%lld.%lld", &intPart, &fracPart);
       else
         na_swscanf((const NAWchar*)valToConvert.data(), L"%d.%d", &intPart, &fracPart);
       val = intPart * (T)pow(10, col.scale) + fracPart;
     }
   else
     {
       // Don't know about this type -- should have been detected earlier and not
       // used with IUS.

       HS_ASSERT(FALSE);
     }

   return val;
 }

 Int32 computeKeyLengthInfo(Lng32 datatype)
 {
   // Only need to handle types used for IS/IUS. Datetime/interval types and
   // non-integral fixed numerics are all converted to one of these types.
   switch (datatype)
     {
       case REC_BIN16_SIGNED:
       case REC_BPINT_UNSIGNED:
       case REC_BIN16_UNSIGNED:
         return ExHDPHash::SWAP_TWO;

       case REC_BIN32_SIGNED:
       case REC_BIN32_UNSIGNED:
       case REC_FLOAT32:
         return ExHDPHash::SWAP_FOUR;

       case REC_BIN64_SIGNED:
       case REC_BIN64_UNSIGNED:
       case REC_FLOAT64:
         return ExHDPHash::SWAP_EIGHT;

       default:
         return ExHDPHash::NO_FLAGS;
     }
     return ExHDPHash::NO_FLAGS;
 }

 template <class T>
 void IUSValueIterator<T>::init(HSColGroupStruct* group)
 {
   // Strings must be contiguous in the strData buffer for this iterator to
   // work correctly.
   HS_ASSERT(group->strDataConsecutive);
   vp = (T*)group->data;
 }

 template <class T>
 Int32 HSGlobalsClass::processIUSColumn(T* ptr,
                       const NAWchar* format,
                       HSColGroupStruct* smplGroup,
                       HSColGroupStruct* delGroup,
                       HSColGroupStruct* insGroup)
 {
   Int32 retcode = 0;
   HSHistogram* hist = smplGroup->groupHist;
   Lng32 numIntervals = hist->getNumIntervals();
   Lng32 numNonNullIntervals = hist->hasNullInterval()
                                     ? numIntervals - 1
                                     : numIntervals;

   // If the existing histogram is all nulls, and the incremental sample contains
   // any non-nulls, fall back to RUS so new intervals can be created correctly.
   if (numNonNullIntervals == 0 &&
       iusSampleInsertedInMem->getNumRows() > insGroup->nullCount)
     return UERR_WARNING_IUS_NO_LONGER_ALL_NULL;

   Int64 insertFailCount = 0;   // count attempted insertions into CBF that fail
   char title[100];

   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded()) {
      sprintf(LM->msg, "IUS: process column %s", smplGroup->colSet[0].colname->data());
      LM->StartTimer(LM->msg);
   }

   // Allocate and initialize 2 arrays: array of type-specific boundary
   // values to use for binary search for the interval that values
   // belong to, and array of MFV values. Include a space for the special
   // low-value interval (index 0).
   T* boundaryValues = new T[numNonNullIntervals+1];
   T* MFVValues = new T[numNonNullIntervals+1];

   // transfer to group for potential use within mergeDatasetsforIUS().
   smplGroup->boundaryValues = boundaryValues;
   smplGroup->MFVValues = MFVValues;

   // If there is only a null interval, the code to extract the boundary value of
   // the low-value interval (which is "(NULL)") as an instance of type T will get
   // an assertion failure. So skip this code if that is the case; these 2 arrays
   // won't be used anyway in this case because the existing sample contains no
   // non-nulls to place in intervals, and if there are non-nulls in _I, we will
   // have returned above with UERR_WARNING_IUS_NO_LONGER_ALL_NULL.
   if (numNonNullIntervals > 0) {
     for (Lng32 i=0; i<=numNonNullIntervals; i++) {
         convertBoundaryOrMFVValue(hist->getIntBoundary(i), smplGroup,
                                   i, boundaryValues, format);
         convertBoundaryOrMFVValue(hist->getIntMFV(i), smplGroup,
                                   i, MFVValues, format);
     }
   }

   if (LM->LogNeeded()) {
      LM->StopTimer();
   }

   // Use a counting bloom filter to track frequency information. Call its
   // insert() method for each value of smplGroup and insGroup, and its
   // remove() method for each value of delGroup.
   //

   // TBD: to get the max std deviation u of frequency per interval and use
   // it to adjust the average frequency of keys with low frequency as
   // totalRC / totalUec + 3*u, to be used as the 4th argument in CBF cstr below.
   // for ( Lng32 idx = 1; idx<numIntervals; idx++ ) {
   // }


   float false_prob = (float)CmpCommon::getDefaultNumeric(USTAT_INCREMENTAL_FALSE_PROBABILITY);

   if (LM->LogNeeded()) {
      sprintf(title, "IUS: setup CBF");
      LM->StartTimer(title);
   }

    Lng32 maxHashsToUse =
     (ActiveSchemaDB()->getDefaults()).getAsLong(USTAT_IUS_MAX_NUM_HASH_FUNCS);


    CountingBloomFilter* cbf = smplGroup->cbf;

   Int64 rowInx;

   // Create a value iterator that encapsulates the necessary operations to
   // manipulate the values being processed in the in-memory table, and move
   // from one to the next regardless of the underlying column type.
   IUSValueIterator<T> valIter(ptr);

   Int32 intervalIdx;

   // Array to use for interval counts as a backup to CBF (only used for logging
   // and comparison to CBF counts).
   Int64* intvlRC = NULL;
   if (LM->LogNeeded()) {
     intvlRC = new(STMTHEAP) Int64[numNonNullIntervals+1];
     memset(intvlRC, 0, sizeof(Int64) * (numNonNullIntervals+1));
   }

   if ( cbf == NULL ) {


       smplGroup->cbf = cbf = new (STMTHEAP)
                 CountingBloomFilterWithKnownSkews(
                         STMTHEAP,

                         (UInt32)maxHashsToUse,

                         // The expected number of distinct keys
                         // @WARN: Have to do a narrowing cast here, ctor only takes a UInt32.
                         MAXOF(
                             (UInt32)(sampleRowCount -
                                      iusSampleDeletedInMem->getNumRows() +
                                      iusSampleInsertedInMem->getNumRows()),
                              UInt32(hist->getTotalUec())
                              ),

                         // probability of false positives, from CQD
                         false_prob,

                         // averate frequency plus the max stddev times 3
                         UInt32(hist->getTotalRowCount() / hist->getTotalUec())
                             + 3*UInt32(ceil(hist->getMaxStddev())),

                         // expected # of keys with high frequency.
                         // +1 so that interval# maps to bucket# directly
                         (UInt32)((hist->getNumIntervals()+1) * 2),

                         // # of intervals that keys mapped to.
                         // +1 so that interval# maps to bucket# directly
                         hist->getNumIntervals()+1
                                                  );


      if ( LM->LogNeeded() ) {
         sprintf(LM->msg, "currentSampleSize=" PF64 ", deleteSize=" PF64 ", insertSize=" PF64 " ",
                      sampleRowCount, iusSampleDeletedInMem->getNumRows(),
                      iusSampleInsertedInMem->getNumRows());
         LM->Log(LM->msg);

         cbf->setLogFile((char*)(LM->logFileName()->data()));
         logCBF("before forming S(i-1)", cbf);
      }


      cbf->setKenLengthInfo( computeKeyLengthInfo(smplGroup->ISdatatype) );

      if (LM->LogNeeded()) {
         sprintf(title, "IUS: insert into CBF " PF64 " keys for S(i-1)", sampleRowCount);
         LM->StartTimer(title);
      }

      // Insert into cbf the values from the column in the sample table
      Int64 numSmplRows = sampleRowCount;
      valIter.init(smplGroup);

      insertFailCount = 0;


      for (rowInx=1; rowInx<=numSmplRows - smplGroup->nullCount; rowInx++)
        {
          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());
          if (intvlRC) intvlRC[intervalIdx]++;  // for logging


          CountingBloomFilter::INSERT_ENUM insert_status =
           cbf->insert((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                      (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE);


          // non-mfv value overflows to mfv. bail out.
          if (insert_status == CountingBloomFilter::NEW_MFV) {

 #if 0
            if ( LM->LogNeeded() ) {
                sprintf(LM->msg, "rowIdx=%d, interval=%d, valSize=%d", rowInx, intervalIdx, valIter.size());
                LM->Log(LM->msg);

                memcpy(LM->msg, (char*)valIter.dataRepPtr(), valIter.size());
                LM->Log(LM->msg);
                logCBF("after the above key, cbf is:", cbf);
             }
 #endif

             if (LM->LogNeeded()) {
               // only issue the warning if logging is turned on
               diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
                 << DgString0(smplGroup->colSet[0].colname->data());
               LM->Log("NONMFV overflow");
               LM->StopTimer();  // Need both of these; there are
               LM->StopTimer();  //   2 outstanding timer events
             }
             return UERR_IUS_INSERT_NONMFV_OVERFLOW;
          }


          // non-mfv value can not find a slot in CBF, record the failure and continue
          if (insert_status == CountingBloomFilter::NO_SLOT ||
              insert_status == CountingBloomFilter::PARAM_ERROR)
            insertFailCount++;

 #if 0
          if ( LM->LogNeeded() && intervalIdx  == 44 ) {
              sprintf(LM->msg, "key=%d, interval=%d", *(int*)valIter.dataRepPtr(), intervalIdx);
              LM->Log(LM->msg);
              logCBF("after the above key, cbf is:", cbf);
          }
 #endif

          valIter.next();

        }


      if (LM->LogNeeded()) {
         logCBF("after s(i-1) insertion , cbf is:", cbf);
         LM->StopTimer();
         if (insertFailCount > 0) {
            sprintf(LM->msg, "For S(i-1), " PF64 " failures out of " PF64 " CBF insertions.",
                    insertFailCount, numSmplRows - smplGroup->nullCount);
            LM->Log(LM->msg);
         }
      }

   } else { // cbf already exists
      // 1 more bucket than interval so that interval# maps to bucket# directly
      HS_ASSERT(cbf->numBuckets() == hist->getNumIntervals() + 1);
      cbf->setKenLengthInfo( computeKeyLengthInfo(smplGroup->ISdatatype) );
      sampleRowCount = cbf->totalFreqForAll();
   }

   //logCBF("after forming S(i-1)", cbf);


   if (LM->LogNeeded()) {
     LM->StopTimer();
   }


   Int64 numDelRows = 0;
   Int64 numInsRows = 0;

   //
   // Algorithm2:
   //
   // delete from cbf those values from the column in the deleted row inMem table

   numDelRows = iusSampleDeletedInMem->getNumRows();

   if (LM->LogNeeded()) {
      sprintf(title, "IUS: delete D from CBF (" PF64 " keys)", numDelRows);
      LM->StartTimer(title);
   }

   valIter.init(delGroup);
   for (rowInx=1; rowInx<=numDelRows - delGroup->nullCount; rowInx++)
     {


 /*
       if ( LM->LogNeeded() && strcmp(smplGroup->colSet[0].colname->data(), "COLSINT") == 0 ) {

 unsigned char* x = (unsigned char*)valIter.dataRepPtr();
 if ( x[0] == (unsigned char)255 && x[1] == (unsigned char)127 ) {
          sprintf(LM->msg, "key=%d, interval=%d", *(short*)valIter.dataRepPtr(), intervalIdx);
          LM->Log(LM->msg);
 }
       }
 */


       intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());
       if (intvlRC) intvlRC[intervalIdx]--;  // for logging
       cbf->remove((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                   (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE);

 #if 0
       if ( LM->LogNeeded() ) {
          sprintf(buf, "key=%d, interval=%d", *(int*)valIter.dataRepPtr(), intervalIdx);
          LM->Log(buf);
       }
 #endif

       valIter.next();
     }

   if (LM->LogNeeded()) {
     LM->StopTimer();
   }

   //logCBF("after deleting D", cbf);

   // Insert into cbf the values from the column in the inserted row inMem table
   numInsRows = iusSampleInsertedInMem->getNumRows();

   if (LM->LogNeeded()) {
      sprintf(title, "IUS: insert I into CBF (" PF64 " keys)", numInsRows);
      LM->StartTimer(title);
   }

   // An object to keep track of any new lowest and highest values so we
   // can tweak interval boundaries if need be
   HSHiLowValues<T> hiLowVal;

   insertFailCount = 0;
   valIter.init(insGroup);
   for (rowInx=1; rowInx<=numInsRows - insGroup->nullCount; rowInx++)
     {
       intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());
       if (intvlRC) intvlRC[intervalIdx]++;  // for logging

       hiLowVal.findHiLowValues(valIter.val());

       CountingBloomFilter::INSERT_ENUM insert_status =
           cbf->insert((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                      (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE);

       // non-mfv value overflows to mfv. bail out.
       if (insert_status == CountingBloomFilter::NEW_MFV) {
          if (LM->LogNeeded())
            {
              // only issue warning if logging is turned on
              diagsArea << DgSqlCode(UERR_IUS_INSERT_NONMFV_OVERFLOW)
                        << DgString0(smplGroup->colSet[0].colname->data());
            }
          LM->StopTimer();
          return UERR_IUS_INSERT_NONMFV_OVERFLOW;
       }

       // non-mfv value can not find a slot in CBF, record the failure and conintue
       if (insert_status == CountingBloomFilter::NO_SLOT ||
           insert_status == CountingBloomFilter::PARAM_ERROR)
          insertFailCount++;

 #if 0
       if ( LM->LogNeeded() ) {
          sprintf(buf, "key=%d, interval=%d", *(int*)valIter.dataRepPtr(), intervalIdx);
          LM->Log(buf);
       }
 #endif

       valIter.next();
     }

   smplGroup->allKeysInsertedIntoCBF = TRUE;

   if (LM->LogNeeded()) {
      LM->StopTimer();
      if (insertFailCount > 0) {
          sprintf(LM->msg, "For I, " PF64 " failures out of " PF64 " CBF insertions.",
                  insertFailCount, numInsRows - insGroup->nullCount);
          LM->Log(LM->msg);
      }
      logCBF("after inserting I", cbf);
   }

   //
   // end of Algorithm 2
   //

   //
   // Compute the new scale factor: the ratio of the RC of the table to
   // that of the final sample.
   //
   double scaleFactor =
        (double) actualRowCount / (sampleRowCount - numDelRows + numInsRows);


   if (LM->LogNeeded()) {
      sprintf(LM->msg, "actualRC=" PF64 " sampleRC=" PF64 " delRC= " PF64 ", InsRC=" PF64 ", scaleFactor=%f",
                actualRowCount, sampleRowCount, numDelRows, numInsRows, scaleFactor);
      LM->Log(LM->msg);
   }

   // nullCount is the scaled version of the number of inserted nulls minus the number
   // of deleted ones; this value, which could be negative, will be added to the
   // row count for the original null interval.
   Int32 nullCount = (Int32)((insGroup->nullCount - delGroup->nullCount) * scaleFactor) ;
   retcode = estimateAndTestIUSStats(smplGroup, delGroup, insGroup,
                                     hist, cbf, numNonNullIntervals,
                                     scaleFactor, nullCount, intvlRC);

   // If the adjusted histogram is judged worthy by estimateAndTestIUSStats, we next
   // tweak the highest and lowest interval boundaries. If the sample data produced a
   // value higher than the highest interval, we extend that interval's boundary to
   // include it. Similarly on the low end. We don't attempt to shrink the intervals,
   // though, if sample values were deleted. Instead we count on reversion to RUS
   // if the rowcounts of those intervals get too small.
   if ((retcode == 0) && (hiLowVal.seenAtLeastOneValue_))
     {
       T convertedBoundaryValue[1];  // a target for convertBoundaryOrMFVValue

       // highest interval -- the highest boundary is stored in
       // interval numNonNullIntervals

       // convert boundary to data type T
       const HSDataBuffer & hiBoundary = hist->getIntBoundary(numNonNullIntervals);
       convertBoundaryOrMFVValue(hiBoundary,
                                 smplGroup,
                                 0,
                                 convertedBoundaryValue,
                                 format);

       if (hiLowVal.hiValue_ > convertedBoundaryValue[0])
         {
           // convert T back to what would be stored in the histogram
           HSDataBuffer newHiBoundary;
           Lng32 convertRC = setBufferValue(hiLowVal.hiValue_,
                                            smplGroup,
                                            newHiBoundary);
           hist->setIntBoundary(numNonNullIntervals,newHiBoundary);
         }

       // lowest interval -- the low boundary is stored in interval 0

       // convert boundary to data type T
       const HSDataBuffer & lowBoundary = hist->getIntBoundary(0);
       convertBoundaryOrMFVValue(lowBoundary,
                                 smplGroup,
                                 0,
                                 convertedBoundaryValue,
                                 format);

       if (hiLowVal.lowValue_ < convertedBoundaryValue[0])
         {
           // convert T back to what would be stored in the histogram
           HSDataBuffer newLowBoundary;
           Lng32 convertRC = setBufferValue(hiLowVal.lowValue_,
                                            smplGroup,
                                            newLowBoundary);
           hist->setIntBoundary(0,newLowBoundary);
         }
     }

   if (intvlRC)
     NADELETEBASIC(intvlRC, STMTHEAP);
   return retcode;
 }

 Int32 HSGlobalsClass::logCBF(const char* title, CountingBloomFilter* cbf)
 {
   char buf1[30];
   char buf2[30];

   HSLogMan *LM = HSLogMan::Instance();

   sprintf(LM->msg, "====================================");
   LM->Log(LM->msg);
   LM->Log(title);

   cbf->outputParams(LM->msg);
   LM->Log(LM->msg);

   for ( UInt32 b = 1; b<cbf->numBuckets(); b++ ) {
       UInt64 rc  = cbf->totalFreq(b);
       convertInt64ToAscii(rc, buf1);

       UInt64 uec = cbf->uec(b);
       convertInt64ToAscii(uec, buf2);

       sprintf(LM->msg, "%5d | %s | %s\n",  b, buf1, buf2);
       LM->Log(LM->msg);

      VarUIntArray& lowf2s = cbf->lowF2s(b);
      for (UInt32 i=1; i < lowf2s.entries(); i++)
       {
         if (lowf2s[i] >0) {
           sprintf(LM->msg, "\tf%d=%d\n",
                  i, lowf2s[i]);
           LM->Log(LM->msg);
         }
       }

   }

    cbf->computeOverflowF2s();
    UInt32 highF2s = cbf->getOverflowEntries();
    sprintf(LM->msg, "\nHigh freq area:\n");
    LM->Log(LM->msg);

    for ( CollIndex i = 0 ; i < highF2s; i++) {
      UInt64 freq;
      UInt32 bucket;
      UInt64 f2 = cbf->highF2(i, freq, bucket);

      if ( freq > 0 ) {
         sprintf(LM->msg, "In bucket %d, f%d=%d\n",
                       bucket, (UInt32)freq, (UInt32)f2);
         LM->Log(LM->msg);
      }
    }


   sprintf(LM->msg, "====================================");

   LM->Log(LM->msg);

   sprintf(LM->msg, "====================================");
   LM->Log(LM->msg);
   return 0;
 }

 double HSGlobalsClass::computeAvgCharLengthForIUS(HSColGroupStruct* smplGroup,
                                                   HSColGroupStruct* delGroup,
                                                   HSColGroupStruct* insGroup)
 {
   Int64 delRows = iusSampleDeletedInMem->getNumRows();
   Int64 insRows = iusSampleInsertedInMem->getNumRows();

   // smplGroup->avgVarCharSize is the avg varchar size of the column represented
   // by smplGroup in the persistent sample, retrieved from the Histograms table
   // in selectIUSBatch(). The result of the current function replaces that value.
   Int64 oldSum = (Int64)(sampleRowCount * smplGroup->avgVarCharSize);
   Int64 newSum = (Int64)(oldSum - (delGroup->avgVarCharSize * delRows)
                          + (insGroup->avgVarCharSize * insRows));
   Int64 newRows = sampleRowCount - delRows + insRows;
   return (double)newSum / (double)newRows;
 }

 Int32 HSGlobalsClass::estimateAndTestIUSStats(HSColGroupStruct* group,
                                               HSColGroupStruct* delGroup,
                                               HSColGroupStruct* insGroup,
                                               HSHistogram* hist,
                                               CountingBloomFilter* cbf,
                                               Lng32 numNonNullIntervals,
                                               double scaleFactor,
                                               Int32 nullCount,
                                               Int64* intvlRC)
 {
   Int32 retcode = 0;


   ///////////////////////////////////////////////////
   // fetch uec and rowcount per interval from cbf.
   ///////////////////////////////////////////////////
   UInt64* sampledIntvlRCs = new(STMTHEAP) UInt64[cbf->numBuckets()];
   UInt64* sampledIntvlUECs = new(STMTHEAP) UInt64[cbf->numBuckets()];


   HSLogMan *LM = HSLogMan::Instance();

   if (LM->LogNeeded()) {
     sprintf(LM->msg, "IUS: estimateAndTestIUSStats() for column %s",
                      group->colSet[0].colname->data());
     LM->StartTimer(LM->msg);
     hist->logAll("The existing histogram is:");
     sprintf(LM->msg, "Total #intervals=%d, scaleFactor=%f,nullCount=%d",
                       numNonNullIntervals, scaleFactor, nullCount);
     LM->Log(LM->msg);

   }

   // low frequency area first, skip the lowest special interval
   for ( UInt32 b = 1; b<cbf->numBuckets(); b++ ) {

       // RC and UEC stats collected for each interval (bucket)
       sampledIntvlRCs[b] = cbf->totalFreq(b);
       sampledIntvlUECs[b] = cbf->uec(b);
   }


   ///////////////////////////////////////////////
   // estimate the uec per interval
   ///////////////////////////////////////////////
   //UInt64* estIntvlUECs = new(STMTHEAP) UInt64[numIntervals+1];
   double DshMax = CmpCommon::getDefaultNumeric(USTAT_DSHMAX);
   double coeffOfVar;
   double Duj;
   double Dsh;

   double totalAvgRC = 0;

   UInt64 totalUEC = 0;
   UInt64 totalRC = 0;


   // Populate fi for high frequency data
   cbf->computeOverflowF2s();

   if (LM->LogNeeded()) {
      sprintf(LM->msg, "#high freq entries in cbf=%d", cbf->getOverflowEntries());
      LM->Log(LM->msg);
   }

   HS_ASSERT(cbf->canHandleArbitrarySkewedValue() == FALSE);

   // save the total RC/UEC before during the processing of each of the interval
   // below, the interval RC and UEC will be updated.
   Int64 origTotalRC= 0;
   Int64 origTotalUEC = 0;
   hist->getTotalCounts(origTotalRC, origTotalUEC);

   double rcIntChangeThreshold =
     CmpCommon::getDefaultNumeric(USTAT_IUS_INTERVAL_ROWCOUNT_CHANGE_THRESHOLD);

   double uecIntChangeThreshold =
     CmpCommon::getDefaultNumeric(USTAT_IUS_INTERVAL_UEC_CHANGE_THRESHOLD);


   // skip the 0th interval, and go over all not-null intervals. idx is the
   // bucket number for CBF. Buckets are 0-index in CBF.
   //
   // If a shape test fails for an interval, we exit the following loop
   // immediately and return a result that causes the caller to bail out of IUS,
   // unless logging is enabled, in which case all intervals are processed
   // regardless of shape test failures. This allows us to view all problems in
   // the distribution of data after IUS when looking at the log. In this case,
   // the original shape test failure is remembered and used as the basis of
   // the return result and the information entered into the diagnostics area.
   USTAT_ERROR_CODES shapeTestError = UERR_NO_ERROR;
   char shapeFlags[10];
   for ( Lng32 idx = 1; idx<=numNonNullIntervals; idx++ ) {

       // This is an annotation for each interval in log file indicating which,
       // if any, shape tests failed for that interval.
       shapeFlags[0] = '\0';

       // get fi for i=1,2,...
       FrequencyCounts fi;


       // Populate fi for low frequent values first
       for ( UInt32 j = 1; j<cbf->lowF2s(idx).entries(); j++ ) {
          UInt32 f2 = cbf->lowF2s(idx)[j]; // fi=f2

          if ( f2 > 0 ) {

            if (LM->LogNeeded() && getenv("lf") ) {
                sprintf(LM->msg, "   low freq: f%d=%d", j, f2);
                LM->Log(LM->msg);
            }

            fi.increment((Int64)j, (ULng32)f2);

          }
       }

       UInt64 i = 0;
       UInt32 b = 0;

       // The trick to use 2*idx+k as the iterators is only possible with
       // CountingBloomFilterWithKnownSkews. The assertion at the beginning of
       // this method assures this.

       for ( UInt32 k=0; k<=1; k++ ) {

         UInt64 f2 = cbf->highF2(2*idx+k, i, b);
                                            // f2, i and b are fi, index (i in fi) and
                                            // the bucket# of the kth entry
                                            // in overflow area, respectively.
                                            // b === (2*idx+k)/2 === idx


         if ( i > 0 && f2 > 0 ) {
            sampledIntvlUECs[b]++;
            sampledIntvlRCs[b] += i;

            if (LM->LogNeeded() && getenv("hf") ) {
              sprintf(LM->msg, "   high freq: f%d=%d", (UInt32)i, (UInt32)f2);
              LM->Log(LM->msg);
            }

            fi.increment((Int64)i, (ULng32)f2);  // @WARN: Narrowing cast of 2nd argument


            if ( k == 0 )
               hist->setIntMFVRowCount(b, i);
            else
               hist->setIntMFV2RowCount(b, i);
         }

       }

       // Verify interval's row count accumulated by CBF.
       if (intvlRC && LM->LogNeeded())
         {
           if (intvlRC[idx] != sampledIntvlRCs[idx])
             {
               sprintf(LM->msg,
                       "*** Row count mismatch for interval %d: CBF=" PF64 ", count=" PF64 " ***",
                       idx, sampledIntvlRCs[idx], intvlRC[idx]);
               LM->Log(LM->msg);
             }
         }


       double oldRC = (double)hist->getIntRowCount(idx);
       double newEstRC = (double)(sampledIntvlRCs[idx] * scaleFactor);

       if ( 0 == sampledIntvlRCs[idx]) {
          if(shapeTestError == UERR_NO_ERROR)
             shapeTestError = UERR_WARNING_IUS_EMPTY_INTERVAL;
          if (LM->LogNeeded())
             strcat(shapeFlags, "e");
          else
            break;  // exit loop and return result
       }

       double estIntvlUEC ;

       // If sampled RC and UEC are the same, we set the newEstRC to the scaled up RC.
       if ( sampledIntvlUECs[idx] >= sampledIntvlRCs[idx] )
          estIntvlUEC = newEstRC;
       else
          estIntvlUEC = lwcUecEstimate(
                                 (double)sampledIntvlUECs[idx],// sampleUEC
                                 (double)sampledIntvlRCs[idx], // sampleRowCnt
                                 newEstRC,   // est total RC in the interval
                                 &fi,        // fi
                                 DshMax,     // input
                                 coeffOfVar, // output
                                 Duj,        // output
                                 Dsh         // output
                                 );


       double oldUec = (double)hist->getIntUec(idx);

       // Use the oldUec if estimatedUec is nan.  This is to work around
       // the nan value produced by lwcUecEstimate() above.

 #if __GNUC_MINOR__ == 8
       if ( std::isnan(estIntvlUEC) )
 #else
       if ( isnan(estIntvlUEC) )
 #endif
         estIntvlUEC = oldUec;

       // cap the new UEC with the RC
       if ( estIntvlUEC > newEstRC )
          estIntvlUEC = newEstRC;


       totalRC += (UInt64)newEstRC;
       totalUEC += (UInt64)estIntvlUEC;


       if ( estIntvlUEC== 0 ) {
          if(shapeTestError == UERR_NO_ERROR)
             shapeTestError = UERR_WARNING_IUS_ZERO_UEC_INTERVAL;
          if (LM->LogNeeded())
             strcat(shapeFlags, "z");
          else
             break;  // exit loop and return result
       }

       if ( oldUec > 2 ) {

         // Run the interval based shape test right here.

         //
         // Test that the row-count changes
         //
         if ( (newEstRC>oldRC) &&
              delta(oldRC, newEstRC)/oldRC > rcIntChangeThreshold ) {
            if(shapeTestError == UERR_NO_ERROR)
               shapeTestError = UERR_WARNING_IUS_TOO_MUCH_RC_CHANGE_INTERVAL;
            if (LM->LogNeeded())
               strcat(shapeFlags, "r");
            else
               break;  // exit loop and return result
         }


         //
         // Test UEC changes
         //

         if ( (estIntvlUEC > oldUec) &&
              delta(oldUec, estIntvlUEC)/oldUec > uecIntChangeThreshold ) {
            if(shapeTestError == UERR_NO_ERROR)
               shapeTestError = UERR_WARNING_IUS_TOO_MUCH_UEC_CHANGE_INTERVAL;
            if (LM->LogNeeded())
               strcat(shapeFlags, "u");
            else
               break;  // exit loop and return result
         }
       }

       if (LM->LogNeeded()) {
          sprintf(LM->msg,
            "%-5sAt intv[%d], RC(old, new)= (%f, %f); UEC(old, new)=(%f,%f), (coeffOfVar, Duj,Dsh)=(%f, %f,%f)",
            shapeFlags, idx,
            oldRC, newEstRC,
            oldUec, estIntvlUEC,
            coeffOfVar, Duj, Dsh );
          LM->Log(LM->msg);
       }

       // save the new RC and UEC for the interval
       hist->setIntRowCount(idx, (Int64)newEstRC);
       hist->setIntUec(idx, (Int64)estIntvlUEC);
   } // for each nonnull interval


   if (shapeTestError != UERR_NO_ERROR) {
      if (LM->LogNeeded()) {
         sprintf(LM->msg, "IUS could not be used for column %s due to failure of "
                           "one or more shape tests as indicated by symbols in "
                           "leftmost column in table above:\n"
                           "  e = empty interval\n"
                           "  z = zero UEC interval\n"
                           "  r = too much RC change\n"
                           "  u = too much UEC change",
                 group->colSet[0].colname->data());
         LM->Log(LM->msg);
         // only issue the warning diagnostic if logging is on
         diagsArea << DgSqlCode(shapeTestError)
                   << DgString0(group->colSet[0].colname->data());
         }
      LM->StopTimer();
      return shapeTestError;
      }

   if (LM->LogNeeded()) {
      sprintf(LM->msg, "IUS upgraded histogram for column %s is",
                  group->colSet[0].colname->data());
      group->groupHist->logAll(LM->msg);
   }


   //
   // Handle NULLs
   //
   if ( ! hist->hasNullInterval() && nullCount > 0 ) {
        hist->addNullInterval(nullCount, group->colCount);
        totalRC += nullCount;
        totalUEC++;
   } else if (hist->hasNullInterval()) {
 	 // Add number of nulls inserted minus the number deleted.
 	 // nullCount does not include original #nulls in sample.
      hist->addIntRowCount(hist->getNumIntervals(), nullCount);
      totalRC += hist->getIntRowCount(hist->getNumIntervals());
   }

   //
   // Run the total RC and UEC shape test.
   //

   //
   // Test the absolute total row-count percentage change.
   //
   double rcTotalChangeThreshold =
     CmpCommon::getDefaultNumeric(USTAT_IUS_TOTAL_ROWCOUNT_CHANGE_THRESHOLD);


   if ( (totalRC > (UInt64)origTotalRC) &&
       delta((UInt64)origTotalRC, totalRC)/origTotalRC > rcTotalChangeThreshold )  {
      if (LM->LogNeeded())
        {
          // only do the warning diagnostic if logging is enabled
          diagsArea << DgSqlCode(UERR_WARNING_IUS_TOO_MUCH_RC_CHANGE_TOTAL)
                    << DgString0(group->colSet[0].colname->data());
        }
      LM->StopTimer();
      return UERR_WARNING_IUS_TOO_MUCH_RC_CHANGE_TOTAL;
   }

   //
   // Test the absolute total UEC percentage change.
   //
   double uecTotalChangeThreshold =
     CmpCommon::getDefaultNumeric(USTAT_IUS_TOTAL_UEC_CHANGE_THRESHOLD);

   if ((totalUEC > (UInt64)origTotalUEC) &&
       delta((UInt64)origTotalUEC, totalUEC)/origTotalUEC > uecTotalChangeThreshold ) {
      if (LM->LogNeeded())
        {
          // only do the warning diagnostic if logging is enabled
          diagsArea << DgSqlCode(UERR_WARNING_IUS_TOO_MUCH_UEC_CHANGE_TOTAL)
                    << DgString0(group->colSet[0].colname->data());
        }
      LM->StopTimer();
      return UERR_WARNING_IUS_TOO_MUCH_UEC_CHANGE_TOTAL;
   }


   //
   // Handle avg varchar
   //
   if (group->computeAvgVarCharSize())
     group->avgVarCharSize = computeAvgCharLengthForIUS(group, delGroup, insGroup);
   else
     group->avgVarCharSize = -1;


   //
   //  Handle std. deviation of frequencies per interval
   //  by computing the sum of frequencies squared per interval.
   //  The actual std deviation of frequencies will be computed
   //  using the sum in method HSGlobalsClass::WriteStatistics().
   //
   double* sumSq = new (STMTHEAP) double[numNonNullIntervals+1];
   cbf->computeSumOfFrequencySquared(sumSq, numNonNullIntervals+1);

   // Copy out the sums to hist.
   for (Int32 i=1; i<=hist->getNumIntervals(); i++) {
      hist->setIntSquareSum(i, sumSq[i]);
   }

   NADELETEBASIC(sumSq, STMTHEAP);


   if (LM->LogNeeded()) {
      sprintf(LM->msg, "IUS: computed histogram for column %s is",
                   group->colSet[0].colname->data());
      hist->logAll(LM->msg);
      LM->StopTimer();
   }


   return retcode;
 }


 // For each group in PENDING state, we need to run RUS. Here we
 // merge the data from set S(i-1), D and I into one data stream
 // before applying the internal sort .
 Lng32 HSGlobalsClass::mergeDatasetsForIUS()
 {
    Int32 retcode = 0;

    HSColGroupStruct* group = singleGroup;
    HSColGroupStruct* delgroup = iusSampleDeletedInMem->getColumns();
    Int64 delrows = iusSampleDeletedInMem->getNumRows();
    HSColGroupStruct* insgroup = iusSampleInsertedInMem->getColumns();
    Int64 insrows = iusSampleInsertedInMem->getNumRows();
    while (group) {
      if (group->state==PENDING) {
         retcode = mergeDatasetsForIUS(group, sampleRowCount, delgroup, delrows, insgroup, insrows);
         HSHandleError(retcode);
      }
      group = group->next;
      insgroup = insgroup->next;
      delgroup = delgroup->next;
    }
    return retcode;
 }

 Lng32 HSGlobalsClass::mergeDatasetsForIUS(
                        HSColGroupStruct* smplGroup, Int64 smplrows,
                        HSColGroupStruct* delGroup, Int64 delrows,
                        HSColGroupStruct* insGroup, Int64 insrows)
 {
   Lng32 retcode = -1;
   Lng32 datatype = smplGroup->ISdatatype;

   // Only need to handle types used for IS/IUS. Datetime/interval types and
   // non-integral fixed numerics are all converted to one of these types.
   // Two template parameters are used by the templated overload of
   // mergeDatasetsForIUS(), because in the cases of char and varchar, it uses
   // both IS{Var|Fixed}Char and IUS{Fixed|Var}Char. For all other types, the
   // same type is used for both template parameters.
   switch (datatype)
     {
       case REC_BIN8_SIGNED:
         return mergeDatasetsForIUS((Int8*)smplGroup->data, (Int8*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BOOLEAN:
       case REC_BIN8_UNSIGNED:
         return mergeDatasetsForIUS((UInt8*)smplGroup->data, (UInt8*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BIN16_SIGNED:
         return mergeDatasetsForIUS((Int16*)smplGroup->data, (Int16*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BPINT_UNSIGNED:
       case REC_BIN16_UNSIGNED:
         return mergeDatasetsForIUS((UInt16*)smplGroup->data, (UInt16*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BIN32_SIGNED:
         return mergeDatasetsForIUS((Int32*)smplGroup->data, (Int32*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BIN32_UNSIGNED:
         return mergeDatasetsForIUS((UInt32*)smplGroup->data, (UInt32*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BIN64_SIGNED:
         return mergeDatasetsForIUS((Int64*)smplGroup->data, (Int64*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BIN64_UNSIGNED:
         return mergeDatasetsForIUS((UInt64*)smplGroup->data, (UInt64*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_FLOAT32:
         return mergeDatasetsForIUS((Float32*)smplGroup->data, (Float32*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_FLOAT64:
         return mergeDatasetsForIUS((Float64*)smplGroup->data, (Float64*)NULL,
                                    smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         break;
       case REC_BYTE_F_ASCII:
       case REC_BYTE_F_DOUBLE:
         {
           // Create an object to be used with the value iterator; does not own its content.
           IUSFixedChar fixedChar(FALSE);
           IUSFixedChar::setLength(smplGroup->ISlength);
           IUSFixedChar::setCaseInsensitive(smplGroup->colSet[0].caseInsensitive == 1);
           IUSFixedChar::setColCollation(smplGroup->colSet[0].colCollation);
           IUSFixedChar::setCharSet(smplGroup->colSet[0].charset);
           return mergeDatasetsForIUS(&fixedChar, (ISFixedChar*)NULL,
                                      smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         }
         break;
       case REC_BYTE_V_ASCII:
       case REC_BYTE_V_DOUBLE:
         {
           // Create an object to be used with the value iterator; does not own its content.
           IUSVarChar varChar(FALSE);
           IUSVarChar::setDeclaredLength(smplGroup->ISlength);
           IUSVarChar::setCaseInsensitive(smplGroup->colSet[0].caseInsensitive == 1);
           IUSVarChar::setColCollation(smplGroup->colSet[0].colCollation);
           IUSVarChar::setCharSet(smplGroup->colSet[0].charset);
           return mergeDatasetsForIUS(&varChar, (ISVarChar*)NULL,
                                      smplGroup, smplrows, delGroup, delrows, insGroup, insrows);
         }
         break;

       default:
         retcode = -1;
         HSHandleError(retcode);
     } // switch

   return retcode;
 }

 // For char/varchar, two types are used, hence the two type parameters to this
 // template. The IUS subclasses of IS{Fixed|Var}Char are needed for the iterator
 // and for the arrays of boundary and MFV values, while the lighter weight
 // IS parent classes are used as the values in the data array.  For all other
 // types, the same type is used for both template parameters.
 template <class T_IUS, class T_IS>
 Int32 HSGlobalsClass::mergeDatasetsForIUS(T_IUS* ptr, T_IS* dummyPtr,
                        HSColGroupStruct* smplGroup, Int64 smplrows,
                        HSColGroupStruct* delGroup, Int64 delrows,
                        HSColGroupStruct* insGroup, Int64 insrows)
 {
   HSLogMan *LM = HSLogMan::Instance();
   HSHistogram* hist = smplGroup->groupHist;
   Lng32 numIntervals = hist->getNumIntervals();
   Lng32 numNonNullIntervals = hist->hasNullInterval()
                                     ? numIntervals - 1
                                     : numIntervals;


   // save the address of the source data
   void* source = smplGroup->data;

   // Kludge. allocate the memory at smplGroup->data and grab it as buffer.
   if ( smplGroup->allocateISMemory(smplrows + insrows, FALSE, TRUE) == FALSE )
      return -1;

   // the target buffer
   T_IS* buffer = (T_IS*)(smplGroup->data);

   // restore the source data
   smplGroup->data = source;

   T_IUS* boundaryValues = (T_IUS*)(smplGroup->boundaryValues);
   T_IUS* MFVValues = (T_IUS*)(smplGroup->MFVValues);


   // first work on smplGroup
   Lng32 ct = 0;

   Int32 intervalIdx;

   IUSValueIterator<T_IUS> valIter(ptr);

   // If there is only a null interval in the existing histogram, calculate the
   // null count for the updated histogram, and set up buffer and ct using I alone.
   //
   if (numNonNullIntervals == 0) {
      smplGroup->nullCount -= delGroup->nullCount;
      smplGroup->nullCount += insGroup->nullCount;
      buffer = (T_IS*)(insGroup->data);
      ct = insrows - insGroup->nullCount;

   } else if ( smplGroup->allKeysInsertedIntoCBF == FALSE ) {

      // Since we have not processed all keys from I, we have to clear the CBF
      // and reload it with (S(i-1) - D), and insert I into the buffer directly.
      smplGroup->cbf->clear();

      // Insert S(i-1)

      valIter.init(smplGroup);
      for (Lng32 i=0; i<smplrows; i++ ) {

          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());

          CountingBloomFilter::INSERT_ENUM insert_status =
             smplGroup->cbf->insert((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                      (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE);


          valIter.next();
      }

      if (LM->LogNeeded()) {
        smplGroup->cbf->setLogFile((char*)(LM->logFileName()->data()));
        logCBF("MergeDatasets: after insert S(i-1)", smplGroup->cbf);
      }

      // Delete from D
      valIter.init(delGroup);
      for (Lng32 i=0; i<delrows; i++ ) {

          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());

          smplGroup->cbf->remove((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                      (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE);

          valIter.next();
      }

      if (LM->LogNeeded()) {
        smplGroup->cbf->setLogFile((char*)(LM->logFileName()->data()));
        logCBF("MergeDatasets: after delete D", smplGroup->cbf);
      }

      // Now lookup the reconstructed CBF and insert (S(i-1) - D) into the buffer
      valIter.init(smplGroup);
      for (Lng32 i=0; i<smplrows; i++ ) {

          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());

          // if the reference counter does not reach zero (cbf::remove() return true),
          // we need to save a copy of the data to the buffer. Otherwise, the data is
          // already deleted from CBF and should not be saved in the buffer.
          if ( smplGroup->cbf->remove((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                 (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE) )
          {
             buffer[ct++] = valIter.val();
          }

          valIter.next();
      }

      if (LM->LogNeeded()) {
        smplGroup->cbf->setLogFile((char*)(LM->logFileName()->data()));
        logCBF("MergeDatasets: after check cbf and insert S(i-1)-D", smplGroup->cbf);
      }

      // Finally insert all keys from I into the buffer
      valIter.init(insGroup);
      for (Lng32 i=0; i<insrows; i++ ) {
          buffer[ct++] = valIter.val();
          valIter.next();
      }

   } else {

     // All keys inserted case (i.e., IUS fails because of shape test failures)

      valIter.init(smplGroup);

      for (Lng32 i=0; i<smplrows; i++ ) {

          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());

          // if the reference counter does not reach zero (cbf::remove() return true),
          // we need to save a copy of the data to the buffer. Otherwise, the data is
          // already deleted from CBF and should not be saved in the buffer.
          if ( smplGroup->cbf->remove((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                 (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE) )
          {
             buffer[ct++] = valIter.val();
          }

          valIter.next();
      }

      // Now work on insGroup
      valIter.init(insGroup);

      for (Lng32 i=0; i<insrows; i++ ) {

          intervalIdx = findInterval(numNonNullIntervals, boundaryValues, valIter.val());

          if ( smplGroup->cbf->remove((char*)valIter.dataRepPtr(), valIter.size(), intervalIdx,
                 (valIter.val() == MFVValues[intervalIdx]) ? cbf_key::MFV : cbf_key::NONE) )
          {
             buffer[ct++] = valIter.val();
          }

          valIter.next();
      }
   }


   // We can delete the old memory now. Pass FALSE so strData is preserved. The
   // string content is still referenced by the new objects in data.
   smplGroup->freeISMemory(FALSE);

   // Set the new buffer in place
   smplGroup->data = buffer;
   smplGroup->nextData = buffer + ct;

   // As a safeguard, mark the group as not having its string content (only
   // applicable for char types) laid out consecutively in the strData
   // buffer. This invalidates it for future use with an IUSValueIterator.
   smplGroup->strDataConsecutive = FALSE;

   // Done with these; delete them within this template function while we have
   // the type as a parameter.
   delete [] (T_IUS*)smplGroup->boundaryValues;
   delete [] (T_IUS*)smplGroup->MFVValues;
   smplGroup->boundaryValues = NULL;
   smplGroup->MFVValues = NULL;

   return 0;
 }

 // Format an integral value (which is a scaled value with implied decimal point)
 // as a fixed numeric value.
 void formatFixedNumeric(Int64 value, Lng32 scale, char* buffer)
 {
   char digits[] = "0123456789";
   char temp;
   char *p1 = buffer, *p2 = buffer;
   Int64 xval = (Int64)(value >=0 ? value : -value);  // no template for abs()
   Int32 numDigits = 0;

   // Write the digits out in reverse order, adding the decimal point at the
   // appropriate location.
   do
     {
       *p2++ = digits[xval % 10];
       if (++numDigits == scale)
         *p2++ = '.';
       xval /= 10;
     }
   while (xval > 0 || numDigits < scale);

   // Add sign if negative, then terminating null.
   if (value < 0)
     *p2++ = '-';
   *p2 = '\0';

   // Reverse the string: point to first and last chars, swap and move pointers
   // towards each other until they meet.
   p2--;
   while (p1 < p2)
     {
       temp = *p1;
       *p1++ = *p2;
       *p2-- = temp;
     }
 }


 // Called by setBufferValue() to format an interval boundary value for a
 // 64-bit float column.
 //
 Int32 copyValue(double &value, char *valueBuff, const HSColumnStruct &colDesc, short *len)
 {
   Int32 retcode = 0;  // status is good unless column does not have expected type
   char *ptr;

   if (colDesc.datatype == REC_IEEE_FLOAT64)
     {
       retcode = convFloat64ToAscii(valueBuff, SQL_DOUBLE_PRECISION_DISPLAY_SIZE, value,
                                    SQL_DOUBLE_PRECISION_FRAG_DIGITS, NULL, 0, false);
       ptr = valueBuff + SQL_DOUBLE_PRECISION_DISPLAY_SIZE - 1;
       while (*ptr == ' ')
         ptr--;
       *(ptr+1) = '\0';
     }
   else
     retcode = -1;
   return retcode;
 }

 // Called by setBufferValue() to format an interval boundary value for a
 // 32-bit float column.
 //
 Int32 copyValue(float &value, char *valueBuff, const HSColumnStruct &colDesc, short *len)
 {
   Int32 retcode = 0;  // status is good unless no case for type
   char *ptr;

   if (colDesc.datatype == REC_IEEE_FLOAT32)
     {
       retcode = convFloat64ToAscii(valueBuff, SQL_REAL_DISPLAY_SIZE, value,
                                    SQL_REAL_FRAG_DIGITS, NULL, 0, false);
       ptr = valueBuff + SQL_REAL_DISPLAY_SIZE - 1;
       while (*ptr == ' ')
         ptr--;
       *(ptr+1) = '\0';
     }
   else
     retcode = -1;

   return retcode;
 }

 // Called by setBufferValue() to format an interval boundary value for a
 // char column.
 //
 Int32 copyValue(ISFixedChar &value, char *valueBuff, const HSColumnStruct &colDesc, short *len)
 {
   char *ptr;
   NAWchar *wptr;
   Int32 retcode = 0;  // status is good unless no case for type
   const Lng32 maxCharBoundaryLen =
     (Lng32) CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_BOUNDARY_LEN);

   switch (colDesc.datatype)
     {
         case REC_BYTE_F_ASCII:
           *len = (short)MINOF(colDesc.length, maxCharBoundaryLen);
           memmove(valueBuff,
                  ((ISFixedChar*)((void*)&value))->getContent(), // make it work with template
                  *len);
           ptr = valueBuff + *len - 1;

           // Trim trailing spaces. This is done after possibly truncating the
           // string to maxCharBoundaryLen characters, which can result in
           // removing spaces that are embedded rather than trailing spaces in
           // the original (full) string. This is done to produce the same results
           // as the old (non-internal sort) code.
           while (*ptr == ' ' && ptr >= valueBuff)
             {
               ptr--;
               (*len)--;
             }
           break;

         case REC_BYTE_F_DOUBLE:
           *len = (short)MINOF(colDesc.length, maxCharBoundaryLen*2); // in bytes
           memmove(valueBuff,
                  ((ISFixedChar*)((void*)&value))->getContent(),
                  *len);
           wptr = (NAWchar*)valueBuff + (*len / 2) - 1;
           while (*wptr == L' ' && wptr >= (NAWchar*)valueBuff)  // trim trailing spaces
             {
               wptr--;
               *len -= 2;
             }
           break;

         default:
           retcode = -1;
           break;
     }

   return retcode;
 }

 // Called by setBufferValue() to format an interval boundary value for a
 // varchar column.
 //
 Int32 copyValue(ISVarChar &value, char *valueBuff, const HSColumnStruct &colDesc, short *len)
 {
   char *ptr;
   NAWchar *wptr;
   Int32 retcode = 0;  // status is good unless no case for type
   const Lng32 maxCharBoundaryLen =
     (Lng32) CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_BOUNDARY_LEN);

   switch (colDesc.datatype)
     {
         case REC_BYTE_V_ASCII:
           ptr = ((ISVarChar*)((void*)&value))->getContent(); // make it work with template
           *len = (short)MINOF(*(short*)ptr, maxCharBoundaryLen);
           memmove(valueBuff, ptr+sizeof(short), *len);
           ptr = valueBuff + *len - 1;
           while (*ptr == ' ' && ptr >= valueBuff)        // trim trailing spaces
             {
               ptr--;
               (*len)--;
             }
           break;

         case REC_BYTE_V_DOUBLE:
           ptr = ((ISVarChar*)((void*)&value))->getContent(); // make it work with template
           *len = (short)MINOF(*(short*)ptr, maxCharBoundaryLen*2); // in bytes
           memmove(valueBuff, ptr+sizeof(short), *len);
           wptr = (NAWchar*)valueBuff + (*len / 2) - 1;
           while (*wptr == L' ' && wptr >= (NAWchar*)valueBuff)        // trim trailing spaces
             {
               wptr--;
               *len -= 2;
             }
           break;

         default:
           retcode = -1;
           break;
     }

   return retcode;
 }

 // Called by setBufferValue() to format an interval boundary value for an
 // integral type, or any type mapped to an integer for internal sort.
 //
 Int32 copyValue(Int64 value, char *valueBuff, const HSColumnStruct &colDesc, short *len)
 {
     Int32 scaleFactor;  // 10**scale for fractional seconds
     Int32 fracSecPart;  // fractional part of second, scaled to integer
     Int64 intSecPart; // integral part of number of seconds
     char *ptr = NULL;
     Int32 retcode = 0;  // status is good unless no case for type

     if ((colDesc.datatype >= REC_MIN_BINARY &&
         colDesc.datatype <= REC_MAX_BINARY)||
         colDesc.datatype == REC_DECIMAL_LSE ||
         colDesc.datatype == REC_DECIMAL_UNSIGNED ||
         colDesc.datatype == REC_DECIMAL_LS)
       {
         // Faster than sprintf, works for scale>0, no platform-dependent
         // format specifier for Int64.
         formatFixedNumeric(value, colDesc.scale, valueBuff);
       }
     else
       {
        // The pre-handling of negative interval values is the same for all interval
        // types, so we do it once here before entering the switch statement.
        // FormatRow requires negative interval values to be in a particular
        // or an invalid boundary value will be produced.
        if (colDesc.datatype >= REC_MIN_INTERVAL && colDesc.datatype <= REC_MAX_INTERVAL)
          {
            ptr = valueBuff;
            if (value < 0)
              {
                // FormatRow requires minus sign before right-justified interval
                // value for negative interval.
                *ptr++ = '-';

                // No need to worry about overflow of 2's-complement min negative
                // value; int type chosen depends on the decimal leading field
                // precision, so there is plenty of wiggle room.
                value = -value;
              }
          }
        switch (colDesc.datatype)
        {
         case REC_DATETIME:
           switch (colDesc.precision)
             {
               case REC_DTCODE_DATE:
                 {
                   short year;
                   char month, day;
                   ExpDatetime::getYearMonthDay(value, year, month, day);
                   sprintf(valueBuff, "%04d-%02d-%02d", year, month, day);
                 }
                 break;

               case REC_DTCODE_TIME:
                 if (colDesc.scale > 0)
                   {
                     Int32 seconds = (Int32)(value / (Int32)pow(10, colDesc.scale));
                     sprintf(valueBuff, "%02d:%02d:%02d.%0*d",
                             seconds / 3600, (seconds % 3600) / 60, seconds % 60,
                             colDesc.scale, (Int32)(value % (Int32)pow(10, colDesc.scale)));
                   }
                 else
                   // value must be in int range; cast it so don't have to worry about
                   // platform-specific int64 format specifier.
                   sprintf(valueBuff, "%02d:%02d:%02d",
                           (Int32)value / 3600, ((Int32)value % 3600) / 60, (Int32)value % 60);
                 break;

               case REC_DTCODE_TIMESTAMP:
                 {
                  short dtvals[8];
                  INTERPRETTIMESTAMP(value, dtvals);
                  if (colDesc.scale > 0)
                    sprintf(valueBuff, "%04d-%02d-%02d %02d:%02d:%02d.%0*d",
                            dtvals[0],  // year
                            dtvals[1],  // month
                            dtvals[2],  // day
                            dtvals[3],  // hour
                            dtvals[4],  // minute
                            dtvals[5],  // seconds
                            colDesc.scale, // display width for fractional seconds
                            // Fractional second; compute microseconds, remove trailing
                            // zeroes beyond the scale.
                            (dtvals[6] * 1000 + dtvals[7]) / (Int32)pow(10, 6-colDesc.scale));
                  else
                    sprintf(valueBuff, "%04d-%02d-%02d %02d:%02d:%02d",
                                       dtvals[0], dtvals[1], dtvals[2],
                                       dtvals[3], dtvals[4], dtvals[5]);
                 }
                 break;

               default:
                 retcode = -1;
                 break;
             }
           break;

 // Unary minus used in several places in the following code for intervals,
 // which are always encoded as signed integers. The template instantiations for
 // unsigned types will complain about the attempted negation.

         // For single-field intervals, all we have to do is right-justify the
         // value in a field with width equal to the interval's precision.
         // FormatRow turns this into a valid literal of the specific interval type.
         case REC_INT_YEAR:
         case REC_INT_MONTH:
         case REC_INT_DAY:
         case REC_INT_HOUR:
         case REC_INT_MINUTE:
           // ptr has been set and adjustment made for negative values above.
           sprintf(ptr, PFV64, colDesc.precision, value);

         case REC_INT_SECOND:
           // ptr has been set and adjustment made for negative values above.
           if (colDesc.scale > 0)
             {
               scaleFactor = (Int32)pow(10, colDesc.scale);
               sprintf(ptr,
                       PFV64 "." PFLV64,
                       colDesc.precision,
                       value / scaleFactor,
                       colDesc.scale,
                       value % scaleFactor);
             }
           else
             sprintf(ptr, PFV64, colDesc.precision, value);
           break;

         case REC_INT_YEAR_MONTH:
           // ptr has been set and adjustment made for negative values above.
           sprintf(ptr, PFV64 "-%02d", colDesc.precision, value/12, (Int32)(value%12));
           break;

         case REC_INT_DAY_HOUR:
           // ptr has been set and adjustment made for negative values above.
           sprintf(ptr, PFV64 " %02d", colDesc.precision, value/24, (Int32)(value%24));
           break;

         case REC_INT_HOUR_MINUTE:
           // ptr has been set and adjustment made for negative values above.
           sprintf(ptr, PFV64 ":%02d", colDesc.precision, value/60, (Int32)(value%60));
           break;

         case REC_INT_DAY_MINUTE:
           // ptr has been set and adjustment made for negative values above.
           sprintf(ptr,
                   PFV64 " %02d:%02d",
                   colDesc.precision, value/(24*60), (Int32)(value%(24*60)/60), (Int32)(value%(24*60)%60));
           break;

         case REC_INT_MINUTE_SECOND:
           // ptr has been set and adjustment made for negative values above.
           if (colDesc.scale > 0)
             {
               scaleFactor = (Int32)pow(10, colDesc.scale);
               fracSecPart = (Int32)(value % scaleFactor);
               intSecPart = value / scaleFactor;
               sprintf(ptr,
                       PFV64 ":%02d.%0*d",
                       colDesc.precision, intSecPart / 60,
                       (Int32)(intSecPart % 60), colDesc.scale, fracSecPart);
             }
           else
             sprintf(ptr, PFV64 ":%02d", colDesc.precision, value/60, (Int32)(value%60));
           break;

         case REC_INT_HOUR_SECOND:
           // ptr has been set and adjustment made for negative values above.
           if (colDesc.scale > 0)
             {
               scaleFactor = (Int32)pow(10, colDesc.scale);
               fracSecPart = (Int32)(value % scaleFactor);
               intSecPart = (Int64)value / scaleFactor;
               sprintf(ptr,
                       PFV64 ":%02d:%02d.%0*d",
                       colDesc.precision,
                       intSecPart / 3600,               // hours
                       (Int32)(intSecPart % 3600 / 60), // minutes
                       (Int32)(intSecPart % 60),        // seconds
                       colDesc.scale,
                       fracSecPart);
             }
           else
             sprintf(ptr,
                     PFV64 ":%02d:%02d",
                     colDesc.precision, value / 3600,
                     (Int32)(value % 3600 / 60), (Int32)(value % 60));
           break;

         case REC_INT_DAY_SECOND:
           // ptr has been set and adjustment made for negative values above.
           if (colDesc.scale > 0)
             {
               scaleFactor = (Int32)pow(10, colDesc.scale);
               fracSecPart = (Int32)(value % scaleFactor);
               intSecPart = (Int64)value / scaleFactor;
               sprintf(ptr,
                       PFV64 " %02d:%02d:%02d.%0*d",
                       colDesc.precision,
                       intSecPart / 86400,         // days (86400 seconds=1 day)
                       (Int32)(intSecPart % 86400 / 3600),  // hours
                       (Int32)(intSecPart % 3600 / 60),     // minutes
                       (Int32)(intSecPart % 60),            // seconds
                       colDesc.scale,
                       fracSecPart);
             }
           else
             sprintf(ptr,
                     PFV64 " %02d:%02d:%02d",
                     colDesc.precision,
                     value / 86400,           // days
                     (Int32)(value % 86400 / 3600),    // hours
                     (Int32)(value % 3600 / 60),       // minutes
                     (Int32)(value % 60));             // seconds
           break;

         case REC_BOOLEAN:
           {
             if (value)
               strcpy(valueBuff,"TRUE");
             else
               strcpy(valueBuff,"FALSE");
           }
           break;


         default:
           retcode = -1;
           break;
        } // switch
       }  // else

   return retcode;
 }


 /***************************************************************************/
 /* METHOD:  setBufferValue()                                             */
 /* PURPOSE: Format the boundary value for the current interval as a        */
 /*          Unicode string. The boundary value is the max value represented*/
 /*          by the interval, except in the case of interval 0, in which    */
 /*          case it is the minimum value of interval 1. This function is   */
 /*          used only for internal sort.                                   */
 /* PARAMS:  value(in)    -- Boundary value in string form.                 */
 /*          group(in)    -- Group the histogram is for.                    */
 /*          boundary(out) -- HSDataBuffer object representing the boundary.*/
 /* RETCODE: 0 if successful, negative if error.                            */
 /***************************************************************************/
 template <class T>
 Lng32 setBufferValue(T& value,
                       const HSColGroupStruct *group,
                       HSDataBuffer &boundary)
   {
     HSLogMan *LM = HSLogMan::Instance();
     const HSColumnStruct &colDesc = group->colSet[0];
     Lng32 retcode = 0;
     char formatRowBuff[HS_MAX_BOUNDARY_LEN+10];
     char valueBuff    [HS_MAX_BOUNDARY_LEN+10];
     char *ptr;
     NAWchar *wptr;
     short *len = (short*)formatRowBuff;
     char *data = (char*)(formatRowBuff + sizeof(short));  // Offset of data pointer.

     // Copy the value, with any required formatting, into the buffer.
     Int32 rc=0;
     if ((rc = copyValue(value, valueBuff, colDesc, len)) < 0)
       {
         LM->Log("INTERNAL ERROR (copyValue):");
         sprintf(LM->msg, "Undefined datatype %d", colDesc.datatype);
         LM->Log(LM->msg);
         char errCode[20];
         sprintf(errCode, "%d", rc);
         *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                             << DgString0("copyValue()")
                             << DgString1(errCode)
                             << DgString2(LM->msg);
         throw CmpInternalException("failure in copyValue()",
                                    __FILE__, __LINE__);
       }

     // *len has already been set for char types due to possibility of embedded
     // nulls, but will have to be adjusted here to len in bytes instead of chars.
     if (DFS2REC::isAnyCharacter(colDesc.datatype))
       {
         // Unicode char strings are already in the proper format and can just
         // be copied to the output buffer. For ascii strings, convert to their
         // Unicode equivalent.
         if (DFS2REC::isDoubleCharacter(colDesc.datatype))
           memmove(data, valueBuff, *len);
         else
           {
             wptr = (NAWchar*)data;
             ptr = valueBuff;
             while (ptr < valueBuff + *len)
               {
                 if (*ptr)
                   { *wptr++ = (NAWchar)(unsigned char)(*ptr); ptr++; }
                 else
                   {
                     *wptr++ = NAWchar('\0');
                     ptr++;
                   }
               }
             *len *= 2;  // len must be in bytes, not chars
           }
       }
     else
       {
         *len = strlen(valueBuff);
         na_mbstowcs((NAWchar*)data, valueBuff, *len);  // Len of destination string.
         *len *= 2;        // len must be in bytes, not chars
       }

     retcode = FormatRow(&colDesc, formatRowBuff, boundary);
     HSHandleError(retcode);
     return retcode;
   }

 /***************************************************************************/
 /* METHOD:  setBufferValue()                                             */
 /* PURPOSE: Template specialization that handles the non-internal sort     */
 /*          case, where the boundary value is already in the proper form   */
 /*          and just needs to be passed to FormatRow().                    */
 /* PARAMS:  value(in)    -- Boundary value in string form.                 */
 /*          group(in)    -- Group the histogram is for.                    */
 /*          boundary(out) -- HSDataBuffer object representing the boundary.*/
 /* RETCODE: 0 if successful, negative if error.                            */
 /***************************************************************************/
 Lng32 setBufferValue(myVarChar& value,
                       const HSColGroupStruct *group,
                       HSDataBuffer &boundary)
   {
     Lng32 retcode = 0;
     Lng32 colcount = group->colCount;
     if(group->skewedValuesCollected)
     {
       short lengthOfToken;
       Lng32 i = 0;
       HSDataBuffer tempBoundary(WIDE_(""));
       boundary = tempBoundary;
       HSDataBuffer comma(WIDE_(","));

       short remainingLength = value.len;
       char * tempStr  = (char *) &value;
       tempStr += sizeof(short);

       NABoolean isACharColumn= FALSE;
       NAWchar* ptrToContiguousSingleQuotes = NULL;
       char* copyOfValue = NULL;
       short noOfContiguousSingleQuotes = 0, tempLength = 0;

       short sizeOfNAWchar =
           sizeof(NAWchar);

       // na_wcswcs below expects the string to be null terminated
       // so we are using a temporary string that is a copy of
       // value and is one NAWchar longer with the last char set to
       // null
       copyOfValue = new (STMTHEAP) char[remainingLength+sizeOfNAWchar];
       memcpy(copyOfValue, tempStr, remainingLength);
       NAWchar* wptr = ((NAWchar*)copyOfValue) + ((remainingLength+sizeOfNAWchar)/sizeOfNAWchar) -1;
       *wptr = WIDE_('\0');
       tempStr = copyOfValue;

       NAWchar tempString [HS_MAX_UCS_BOUNDARY_CHAR];
       NAWchar *tempStringPtr = tempString;

       NAWchar * begin = (NAWchar *) tempStr;
       if(*begin == L'\'')
         isACharColumn = TRUE;
       NAWchar* end = NULL;

       if(isACharColumn)
       {
         begin++;
         end = na_wcschr(begin, L'\'');
         while(*(++end) == L'\'')
         {
           end++;
           end = na_wcschr(end, L'\'');
         }
         end--;
       }
       else
         end = na_wcschr(begin, L',');

       do
       {
         if(isACharColumn)
         {
           remainingLength -= sizeof(NAWchar);
           ptrToContiguousSingleQuotes = (NAWchar *)begin;
           while(ptrToContiguousSingleQuotes = na_wcswcs(ptrToContiguousSingleQuotes, WIDE_("''")))
           {
             noOfContiguousSingleQuotes++;
             tempLength = remainingLength - ((ptrToContiguousSingleQuotes - ((NAWchar *)begin)) + 1) * sizeof(NAWchar);
             memmove(ptrToContiguousSingleQuotes, (ptrToContiguousSingleQuotes + 1), tempLength);
             if(i == (colcount - 1))
               begin[(remainingLength-2)/2] = L'\0';
             ptrToContiguousSingleQuotes ++;
             if(end)
               end--;
           }
           remainingLength -= sizeof(NAWchar);
         }

 	if(i < colcount - 1)
 	  lengthOfToken = (end - begin) * sizeof(NAWchar);
 	else
 	  lengthOfToken = remainingLength - (noOfContiguousSingleQuotes*sizeof(NAWchar));

 	memcpy(tempStringPtr, &lengthOfToken , sizeof(short));
 	tempStringPtr ++;
 	na_wcsncpy (tempStringPtr, begin, (lengthOfToken /sizeof(NAWchar)));
 	tempStringPtr --;

 	retcode = FormatRow(&(group->colSet[i]), (char *)tempStringPtr, tempBoundary);
 	HSHandleError(retcode);
 	if(retcode)
 	  break;
 	else
 	{
 	  retcode = boundary.append(tempBoundary);
 	  if(retcode)
 	    break;
 	}

 	remainingLength -= (lengthOfToken + (noOfContiguousSingleQuotes*sizeof(NAWchar)));

 	if(end && (remainingLength > 0))
 	{
 	  retcode = boundary.append(comma);
 	  if(retcode)
 	    break;

 	  remainingLength -= sizeof(NAWchar);
       if(isACharColumn)
       {
         noOfContiguousSingleQuotes = 0;
         isACharColumn = FALSE;
         end++;
       }
 	  begin = end;
 	  begin ++;
       if(*begin == L'\'')
         isACharColumn = TRUE;
       if(isACharColumn)
       {
         begin++;
         end = na_wcschr(begin, L'\'');
         while(end && *(++end) == L'\'')
         {
           end++;
           end = na_wcschr(end, L'\'');
         }
         if(end)
           end--;
       }
       else
         end = na_wcschr(begin, L',');
     }
 	i++;
     }	while (i < colcount);

     if(copyOfValue)
       NADELETEBASIC(copyOfValue, STMTHEAP);
     }
     else
     {
       const HSColumnStruct &colDesc = group->colSet[0];
       retcode = FormatRow(&colDesc, (char*)&value, boundary);
       HSHandleError(retcode);
     }
     return retcode;
   }

 /***************************************************************************/
 /* METHOD:  setBufferValue()                                               */
 /* PURPOSE: Template specialization that handles the MC internal sort case */
 /*          to format the current MC interval boundary value as a Unicode  */
 /*          string.                                                        */
 /* PARAMS:  value(in)    -- Boundary value in string form.                 */
 /*          group(in)    -- Group the histogram is for.                    */
 /*          boundary(out) -- HSDataBuffer object representing the boundary.*/
 /* RETCODE: 0 if successful, negative if error.                            */
 /***************************************************************************/

 Lng32 setBufferValue(MCWrapper& value,
                      const HSColGroupStruct *mgroup,
                      HSDataBuffer &boundary)
 {
     Lng32 retcode = 0;
     Lng32 colcount = mgroup->colCount;
     HSDataBuffer comma(WIDE_(","));
     HSDataBuffer null(WIDE_("NULL"));

     NABoolean isNull = FALSE;

     HSLogMan *LM = HSLogMan::Instance();
     const HSColumnStruct &colDesc = mgroup->colSet[0];
     char formatRowBuff[HS_MAX_BOUNDARY_LEN+10];
     char valueBuff    [HS_MAX_BOUNDARY_LEN+10];
     char *ptr;
     NAWchar *wptr;
     short *len = (short*)formatRowBuff;
     char *data = (char*)(formatRowBuff + sizeof(short));  // Offset of data pointer.

     ISFixedChar isf;
     ISVarChar   isv;

     // Copy the value, with any required formatting, into the buffer.
     Int32 rc=0;
     for (Int32 i = 0; i < colcount; i++)
     {
        const HSColumnStruct &colDesc = mgroup->colSet[i];
        if (value.allCols_[i]->isNull(value.index_))
        {
           isNull = TRUE;
        }

        else
        {
           switch (value.allCols_[i]->ISdatatype)
           {
              case REC_BIN8_SIGNED:
                 retcode = copyValue(*((MCNonCharIterator<Int8>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BOOLEAN:
              case REC_BIN8_UNSIGNED:
                 retcode = copyValue(*((MCNonCharIterator<UInt8>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN16_SIGNED:
                 retcode = copyValue(*((MCNonCharIterator<short>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN16_UNSIGNED:
                 retcode = copyValue(*((MCNonCharIterator<unsigned short>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN32_SIGNED:
                 retcode = copyValue(*((MCNonCharIterator<Int32>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN32_UNSIGNED:
                 retcode = copyValue(*((MCNonCharIterator<UInt32>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN64_SIGNED:
                 retcode = copyValue(*((MCNonCharIterator<Int64>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_BIN64_UNSIGNED:
                 retcode = copyValue(*((MCNonCharIterator<UInt64>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_IEEE_FLOAT32:
                 retcode = copyValue(*((MCNonCharIterator<float>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                 break;
              case REC_IEEE_FLOAT64:
                 retcode = copyValue(*((MCNonCharIterator<double>*)(value.allCols_[i]))->getContent(value.index_), valueBuff, mgroup->colSet[i], len);
                break;
              case REC_BYTE_F_ASCII:
              case REC_BYTE_F_DOUBLE:
                 ((MCFixedCharIterator*)(value.allCols_[i]))->copyToISFixChar(isf, value.index_);
                 retcode = copyValue(isf, valueBuff, mgroup->colSet[i], len);
                break;
              case REC_BYTE_V_ASCII:
              case REC_BYTE_V_DOUBLE:
                 ((MCVarCharIterator*)(value.allCols_[i]))->copyToISVarChar(isv, value.index_);
                 retcode = copyValue(isv, valueBuff, mgroup->colSet[i], len);
                break;
              default:
                 retcode = -1;
                 break;
           }
        }

        if (retcode < 0)
        {
            LM->Log("INTERNAL ERROR (copyValue):");
            sprintf(LM->msg, "Undefined datatype %d", mgroup->colSet[i].datatype);
            LM->Log(LM->msg);
            char errCode[20];
            sprintf(errCode, "%d", rc);
            *CmpCommon::diags() << DgSqlCode(-UERR_GENERIC_ERROR)
                                << DgString0("copyValue()")
                                << DgString1(errCode)
                                << DgString2(LM->msg);
            throw CmpInternalException("failure in copyValue()", __FILE__, __LINE__);
        }
        else
        {
            if (i!=0)
               boundary.append(comma);

            if (!isNull)
            {
               // *len has already been set for char types due to possibility of embedded
               // nulls, but will have to be adjusted here to len in bytes instead of chars.
               if (DFS2REC::isAnyCharacter(colDesc.datatype))
               {
                   // Unicode char strings are already in the proper format and can just
                   // be copied to the output buffer. For ascii strings, convert to their
                   // Unicode equivalent.
                   if (DFS2REC::isDoubleCharacter(colDesc.datatype))
                     memmove(data, valueBuff, *len);
                   else
                   {
                       wptr = (NAWchar*)data;
                       ptr = valueBuff;
                       while (ptr < valueBuff + *len)
                       {
                           if (*ptr)
                           {
                              *wptr++ = (NAWchar)(unsigned char)(*ptr);
                               ptr++;
                           }
                           else
                           {
                              *wptr++ = NAWchar('\0');
                              ptr++;
                           }
                       }
                       *len *= 2;  // len must be in bytes, not chars
                   }
               }
               else
               {
                  *len = strlen(valueBuff);
                  na_mbstowcs((NAWchar*)data, valueBuff, *len);  // Len of destination string.
                  *len *= 2;        // len must be in bytes, not chars
               }

               HSDataBuffer tempBoundary;
               retcode = FormatRow(&colDesc, formatRowBuff, tempBoundary);
               HSHandleError(retcode)
               boundary.append(tempBoundary);
            }
            else
            {
               boundary.append(null);
               isNull = FALSE;
            }
        }
     }

     return retcode;
 }

 // The following 3 functions (getDigitCount(), convInt64ToAscii(), and
 // convFloat64ToAscii()) were borrowed from exp\exp_conv.cpp, and declared as
 // static to give them internal linkage and avoid link errors.

 static Lng32 getDigitCount(Int64 value)
   {
     static const Int64 decValue[] = {0,
                                9,
                                99,
                                999,
                                9999,
                                99999,
                                999999,
                                9999999,
                                99999999,
                                999999999,
                                9999999999LL,
                                99999999999LL,
                                999999999999LL,
                                9999999999999LL,
                                99999999999999LL,
                                999999999999999LL,
                                9999999999999999LL,
                                99999999999999999LL,
                                999999999999999999LL};

     for (Int32 i = 4; i <= 16; i += 4)
       if (value <= decValue[i]) {
 	if (value <= decValue[i-3])
 	  return(i-3);
 	if (value <= decValue[i-2])
 	  return(i-2);
 	if (value <= decValue[i-1])
 	  return(i-1);
 	else return i;
       }
     if (value <= decValue[17])
       return 17;
     if (value <= decValue[18])
       return 18;
     return 19;
   }

 //ex_expr::exp_return_type convInt64ToAscii(char *target,
 static short convInt64ToAscii(char *target,
 					  Lng32 targetLen,
 					  Int64 source,
 					  Lng32 scale,
 					  char * varCharLen,
 					  Lng32 varCharLenSize,
 					  char filler,
 					  NABoolean leadingSign,
                                           NABoolean leftPad,
 					  CollHeap *heap,
 					  ComDiagsArea** diagsArea) {

   Lng32 digitCnt = 0;
   NABoolean negative = (source < 0);
   NABoolean fixRightMost  = FALSE;  // True if need to fix the rightmost digit.

   Lng32 padLen = targetLen;
   Lng32 requiredDigits = 0;
   Lng32 leftMost;   // leftmost digit.
   Lng32 rightMost;  // rightmost digit.
   Lng32 sign = 0;

   //  Int64 newSource = (negative ? -source : source);
   Int64 newSource = 0;
   if ((negative) && (source == 0x8000000000000000LL)) // = -2 ** 63
     {
       newSource = 0x7fffffffffffffffLL;
       //             123456789012345
       digitCnt = 19;
       fixRightMost = TRUE;
     }
   else
     {
       newSource = (negative ? -source : source);
       digitCnt = getDigitCount(newSource);
     }

   if (leadingSign || negative) {
     sign = 1;
     padLen--;
   }
   // No truncation allowed.
   requiredDigits = digitCnt;
   // Add extra zero's.
   if (scale > requiredDigits)
     requiredDigits += (scale - requiredDigits);
   padLen -= requiredDigits;
   if (scale)
     padLen--; // decimal point
   if (padLen < 0) {
     // target string is not long enough - overflow
 //    ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
 //    return ex_expr::EXPR_ERROR;
       return 1;
   }

   if (varCharLenSize) {
     // we do not pad. Instead, we adjust the targetLen
     leftPad = FALSE;
     targetLen -= padLen;
     padLen = 0;
   };

   if (leftPad) {
     leftMost = padLen + sign;
   }
   else {
     leftMost = sign;
   }

   Lng32 currPos;
   // Add filler.
   rightMost = currPos = targetLen - 1;
   if (padLen) {
     Lng32 start;
     if (leftPad) { // Pad to the left.
       start = sign;
     }
     else { // Pad to the right
       start = targetLen - padLen;
       rightMost = currPos = start - 1;
     }
     str_pad(&target[start], padLen, filler);
   }

   // Convert the fraction part and add decimal point.
   if (scale) {
     Lng32 low = (currPos - scale);
     for (; currPos > low; currPos--) {
       target[currPos] = (char)(Int32)(newSource % 10) + '0';
       newSource /= 10;
     }
     target[currPos--] = '.';
   }

   // Convert the integer part.
   for (; currPos >= leftMost; currPos--) {
     target[currPos] = (char)(Int32)(newSource % 10) + '0';
     newSource /= 10;
   }

   // Add sign.
   if (leadingSign) {
     if (negative)
       target[0] = '-';
     else
       target[0] = '+';
   }
   else if (negative)
       target[currPos] = '-';

   // Fix the rightmost digit for -2 ** 63.
   if (fixRightMost && target[rightMost] == '7')
     target[rightMost] = '8';

   if (newSource != 0 || currPos < -1)
     { // Sanity check fails.
 //      ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW);
 //      return ex_expr::EXPR_ERROR;
       return 1;
     }

   // Set varchar length field for varchar.
   if (varCharLenSize)
     if (varCharLenSize == sizeof(Lng32))
       str_cpy_all(varCharLen, (char *) &targetLen, sizeof(Lng32));
     else {
       short VCLen = (short) targetLen;
       str_cpy_all(varCharLen, (char *) &VCLen, sizeof(short));
     };

 //  return ex_expr::EXPR_OK;
   return 0;
 };

 //////////////////////////////////////////////////////////////////
 // function to convert an FLOAT64  to an ASCII string
 // Trailing '\0' is not set!
 // This routine assumes that targetLen is at least
 // SQL_REAL_MIN_DISPLAY_SIZE:
 // 1 byte sign
 // 1 byte for digit in front of decimal point
 // 1 byte decimal point
 // 1 byte for at least one digit after decimal point
 // 5 bytes for exponent (E+DDD)
 ///////////////////////////////////////////////////////////////////
 static short convFloat64ToAscii(char *target,
 		 Lng32 targetLen,
 			double source,
 			// maximum # of fraction digits
 		 Lng32 digits,
 			char * varCharLen,
 		 Lng32 varCharLenSize,
 			NABoolean leftPad) {

   short err = 0;

   Lng32 displaySize = digits + 8; // Mantissa = digits + 3, E = 1, Exponent = 4
   HS_ASSERT(displaySize <= SQL_DOUBLE_PRECISION_DISPLAY_SIZE);
   char tempTarget[SQL_DOUBLE_PRECISION_DISPLAY_SIZE + 1];
   //char format[8];

   // the fraction has always between 1 and "digits" digits
   if ((targetLen - 8) < digits)
     digits = targetLen - 8;

   Lng32 usedTargetLen = MINOF(displaySize, targetLen);

   double absSource = source;
   NABoolean neg = FALSE;
   if (source < 0)
     {
       absSource = -source;
       neg = TRUE;
     }

   Int64 expon = 0;
   Int64 intMantissa = 0;
   NABoolean expPos = TRUE;
   if (absSource > 0)
     {
       double logTen = MathLog10(absSource, err);
       if (err)
 	return -1;

       if (logTen >= 0)
 	{
 	  expPos = TRUE;
 	}
       else
 	{
 	  logTen = -logTen;
 	  expPos = FALSE;
 	};

       while (logTen > 0)
 	{
 	  expon++;
 	  logTen -= 1;
 	}

       if ((expPos) && (logTen != 0))
 	expon--;

       NABoolean reduceExpon = FALSE;
       short reduceExponBy = 0;
       if (expon >= DBL_MAX_10_EXP)
 	{
 	  // if expon is greater than MAX exponent allowed, then reduce it
 	  // the diff between expon and DBL_MAX_10_EXP.
 	  // This is needed so the next MathPow call doesn't return
 	  // an error when it tries to do   10 ** DBL_MAX_10_EXP
 	  // (which will make it greater than the max double value).
 	  reduceExponBy = (short)(expon - DBL_MAX_10_EXP + 1);
 	  expon -= reduceExponBy;
 	  reduceExpon = TRUE;
 	}
       double TenPowerExpon =
 	((expPos == FALSE) ? MathPow(10.0, (double)expon, err)
                            : MathPow(10.0, (double)-expon, err));
       if (err)
 	return -1;

       double mantissa = absSource * TenPowerExpon;

       if (reduceExpon)
 	{
 	  // now fix mantissa by multiplying or dividing by 10.
 	  if (expPos == FALSE)
 	    mantissa = mantissa * MathPow(10.0, reduceExponBy, err);
 	  else
 	    mantissa = mantissa / MathPow(10.0, reduceExponBy, err);

 	  if (err)
 	    return -1;

 	  // and increase expon to its original value
 	  expon += reduceExponBy;
 	}


       intMantissa = (Int64)(mantissa * MathPow(10.0,(double)digits,err));

       if (err)
 	return -1;
     }

   short error;
   error =
     convInt64ToAscii(tempTarget, digits+3, (neg ? -intMantissa : intMantissa),
 		     digits, NULL, 0, ' ',
 		     neg, TRUE, NULL, NULL);
   if (error)
     return -1;

   if (intMantissa == 0)
     {
       // add a 0 before the decimal point of mantissa
       tempTarget[1] = '0';
     }

   tempTarget[digits+3] = 'E';
   error =
     convInt64ToAscii(&tempTarget[digits+4], 4, (expPos ? expon : -expon),
 		     0, NULL, 0, '0',
 		     TRUE, TRUE, NULL, NULL);
   if (error)
     return -1;

   char *pch = tempTarget;
   while (*pch == ' ') {
     usedTargetLen--;
     pch++;
   }

   if (varCharLenSize) {
     // the target is a varChar. Just move the data left adjusted and
     // set the length field.
     str_cpy_all(target, pch, usedTargetLen);

     if (varCharLenSize == sizeof(Lng32))
       str_cpy_all(varCharLen, (char *) &usedTargetLen, sizeof(Lng32));
     else {
       short VCLen = (short) usedTargetLen;
       str_cpy_all(varCharLen, (char *) &VCLen, sizeof(short));
     };
   }
   else {
     // if target is larger than usedTargetLen, fill in blanks.
     Lng32 padLen = targetLen - usedTargetLen;
     if (leftPad) {
       str_pad(target, padLen, ' ');
       str_cpy_all(&target[padLen], pch, usedTargetLen);
     }
     else {
       str_cpy_all(target, pch, usedTargetLen);
       str_pad(&target[usedTargetLen], padLen, ' ');
     }
   };
   return 0;
 }


 #ifdef _TEST_ALLOC_FAILURE
 // Sets up the array that indicates which memory allocation attempts for internal
 // sort are to be simulated as failures. If then nth allocation is to "fail",
 // then n will be an element of the array. The values to put in the array come
 // from the CQD COMP_STRING_5, which should have a sequence of numbers separated
 // by the '/' character.
 //
 void HSColGroupStruct::initFilter()
 {
   HSLogMan *LM = HSLogMan::Instance();
   char* cqdValue = (char*)ActiveSchemaDB()->getDefaults().getValue(COMP_STRING_5);
   char* filterString = new (STMTHEAP) char[strlen(cqdValue)+1];
   // Have to copy; strtok overwrites delims with nulls in stored cqd value.
   strcpy(filterString, cqdValue);
   char* val = strtok(filterString, "/");
   Int32 i = 1; // count will be stored in array elem 0
   while (val && i<MAX_FILTER_COUNT)
     {
       filterTargets[i++] = atoi(val);
       if (LM->LogNeeded())
         {
           sprintf(LM->msg, "Filter target %d = %d", i-1, filterTargets[i-1]);
           LM->Log(LM->msg);
         }
       val = strtok(NULL, "/");
     }
   filterTargets[0] = i-1;
   NADELETEBASIC(filterString, STMTHEAP);
 }

 // Filters memory allocation requests for internal sort, returning TRUE for
 // the ones that match an element of filterTargets, which results in NULL
 // being returned by the memory allocation routine. The parameter is the
 // number of allocation requests for internal sort so far. filterTargets is
 // a list of the requests, by ordinal position, that are to be simulated
 // failures.
 //
 // Parameters:
 //   count -- Number of memory allocation requests for internal sort overall
 //            for the current update stats statement, including the current
 //            allocation request.
 // Return value:
 //   TRUE if the allocation is to fail, FALSE otherwise.
 //
 NABoolean HSColGroupStruct::allocFilter(Lng32 count)
 {
   for (Int32 i=1; i<=filterTargets[0]; i++)
     {
       if (count == filterTargets[i])
         return TRUE;
     }

   return FALSE;
 }
 #endif  /* _TEST_ALLOC_FAILURE */


 /****************************************************************************/
 /* METHOD:  histIsObsolete()                                                */
 /* PURPOSE: Determine whether or not a histogram is to be considered        */
 /*          obsolete. If either the total number of inserts/deletes/updates */
 /*          since the last stats generation, or the difference in row       */
 /*          counts, is greater than a certain percentage of the row count   */
 /*          stored in the histogram (determined by the CQD                  */
 /*          USTAT_OBSOLETE_PERCENT_ROWCOUNT CQD), we consider it obsolete.  */
 /* RETCODE: TRUE if obsolete, FALSE otherwise.                              */
 /* PARAMS:                                                                  */
 /*   obsoletePercent(in) -- Percentage of the histogram row count that must */
 /*                          have changed for the histogram to be considered */
 /*                          obsolete.                                       */
 /*   histRowCount(in)    -- Row count stored with the histogram.            */
 /*   changedRowCount(in) -- Sum of inserts/deletes/updates since last time  */
 /*                          Update Statistics was run on the table.         */
 /****************************************************************************/
 NABoolean histIsObsolete(float obsoletePercent,
                          Int64 histRowCount,
                          Int64 changedRowCount)
   {
     HSGlobalsClass *hs_globals = GetHSContext();
     Int64 obsoleteThreshold = (Int64)(obsoletePercent * histRowCount);
     Int64 rowCountDiff = hs_globals->actualRowCount - histRowCount;
     if (rowCountDiff < 0)
       rowCountDiff = -rowCountDiff;  // No abs fn for Int64
     return MAXOF(changedRowCount, rowCountDiff) > obsoleteThreshold;
   }

 /****************************************************************/
 /* METHOD:  AddNecessaryColumns()                               */
 /* PURPOSE: Determine histograms in need of update by finding   */
 /*          those that have been requested by the optimizer but */
 /*          were not available, or had abbreviated stats added  */
 /*          on the fly by the optimizer.                        */
 /* RETCODE:  0 - successful                                     */
 /*          -1 - failure                                        */
 /* PARAMS:  none                                                */
 /****************************************************************/
 Lng32 AddNecessaryColumns()
   {
     HSLogMan *LM = HSLogMan::Instance();
     HSGlobalsClass *hs_globals = GetHSContext();

     if (LM->LogNeeded())
       LM->Log("Determining NECESSARY columns");

     Lng32 retcode;
     HSColGroupStruct *group = NULL, *sgroup = NULL, *sgroup2 = NULL, *prevGroup = NULL;
     HSColumnStruct col;
     NAString columnName;
     char tempStr[30];
     Int64 objID = hs_globals->objDef->getObjectUID(); // TABLE_UID
     Lng32 maxMCWidthForAutomation = CmpCommon::getDefaultLong(USTAT_AUTO_MC_MAX_WIDTH);
     // Columns read from Histograms table.
     ULng32 histid;
     Lng32 colPos;
     Lng32 colNum;
     Lng32 colCount;
     char reason;

     // Keep track of info on single-column groups for processing multi-column
     // groups; READ_TIME, etc. columns are not maintained for the MC groups.
     struct SingleColStatus
       {
         SingleColStatus()
           : recentlyRead(FALSE), obsolete(FALSE)
           {}
         NABoolean recentlyRead;
         NABoolean obsolete;
       };

     SingleColStatus *singleCols
           = new (STMTHEAP) SingleColStatus[hs_globals->objDef->getNumCols()+1];  // +1 bec. 0 not used

     // Max read age must be at least twice as long as the automation interval.
     Lng32 maxReadMinutes = CmpCommon::getDefaultLong(USTAT_MAX_READ_AGE_IN_MIN);
     if (maxReadMinutes < 2 * HSGlobalsClass::autoInterval)
       maxReadMinutes = 2 *  HSGlobalsClass::autoInterval;

     // Calculate oldest allowed READ_TIME in GMT.
     char maxGMTTimeStr[HS_TIMESTAMP_SIZE];
     Int64 curSecs    = hs_getEpochTime();                     // Get current time in secs.
     Int64 oldestSecs = curSecs - (Int64) (maxReadMinutes*60); // Subtract max read time.
     hs_formatTimestamp(oldestSecs, maxGMTTimeStr);     // Convert to GMT and timestamp string.

     if (LM->LogNeeded())
       {
         sprintf(LM->msg, "\tLooking for histograms read in the last %d minutes.",
                          maxReadMinutes);
         LM->Log(LM->msg);
       }

     // Start a transaction if one not already in progress. The query used to
     // get the current histograms would implicitly start one otherwise, and
     // we would finish the update stats statement still in a transaction that
     // the user did not start. The HSTranController object will either commit
     // or abort the transaction in its dtor on function exit. It must be
     // declared BEFORE the HSErrorCatcher object, so the error catcher dtor
     // is invoked first (reverse order of construction). Otherwise, the execution
     // of the rollback in an error case will cause the CLI diagnostics area to be
     // cleared before the CLI errors can be merged with our own diagnostics area
     // by the HSErrorCatcher dtor.
     //
     HSTranController TC("GET GROUP LIST FOR NECESSARY", &retcode);
     HSErrorCatcher errorCatcher(retcode, -UERR_INTERNAL_ERROR, "AddNecessaryColumns", TRUE);

 #ifdef NA_USTAT_USE_STATIC  // use static query defined in module file
     // Note that the output list of the query used in the SQ module file no
     // longer matches the items selected by the new dynamic version of the query.
     HSCliStatement necStmt(HSCliStatement::CURSOR105_MX_2300,
                            (char *)hs_globals->hstogram_table->data(),
                            (char *)&objID,
                            (char *)&maxGMTTimeStr);
 #else // NA_USTAT_USE_STATIC not defined, use dynamic query
     char sbuf[25];
     NAString qry = "SELECT HISTOGRAM_ID, COL_POSITION, COLUMN_NUMBER, COLCOUNT, REASON "
                    "FROM ";
     qry.append(hs_globals->hstogram_table->data());
     qry.append(    " WHERE TABLE_UID=");
     snprintf(sbuf, sizeof(sbuf), PF64, objID);
     qry.append(sbuf);
     qry.append(      " AND (REASON='S'");
     qry.append(       " OR  REASON=' ')");
     qry.append(    " ORDER BY COLCOUNT, HISTOGRAM_ID, COL_POSITION");
     qry.append(    " FOR READ COMMITTED ACCESS");

     HSCursor necStmt(STMTHEAP, "HS_NECESSARY_COLS_STMT");
     retcode = necStmt.prepareQuery(qry.data(), 0, 5);
     HSLogError(retcode);
     if (retcode < 0)
       {
         if (LM->LogNeeded())
           LM->Log("Failed to prepare query to fetch missing stats data from histograms table");
         errorCatcher.setString1(" in call to prepareQuery()");
         return retcode;
       }
 #endif
     retcode = necStmt.open();
     if (retcode < 0)
       {
         if (LM->LogNeeded())
           LM->Log("Failed to open cursor for query to fetch missing stats data from histograms table");
         errorCatcher.setString1(" in call to open()");
         return retcode;
       }

     colCount = 1;                       // just to satisfy loop initially
     hs_globals->allMissingStats = TRUE; // Initialize flag.

     // First read through the single-column groups. The query for the cursor
     // we're using sorts by COLCOUNT, so the singles will come first. We need
     // their info before we can process the multicolumn groups.
     while (retcode == 0 && colCount == 1)
       {
         retcode = necStmt.fetch(5,
                                (void *)&histid, (void *)&colPos,
                                (void *)&colNum, (void *)&colCount,
                                (void *)&reason
                               );

         if (retcode || colCount > 1)  // end of data, error, or end of single-col groups
           break;

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "\tFound column %s", hs_globals->objDef->getColName(colNum));
             LM->Log(LM->msg);
           }
         if (ColumnExists(colNum))     // this column already explicitly requested
           {
             if (reason == HS_REASON_EMPTY && prevGroup) {
               sprintf(tempStr, " %u,", histid);
               prevGroup->oldHistidList += tempStr;
               if (LM->LogNeeded())
                 LM->Log("\t(duplicate empty histogram)");
             }
             else if (LM->LogNeeded())
               LM->Log("\t\t(already explicitly requested)");
             continue;
           }

         singleCols[colNum].recentlyRead = TRUE;  // query selects only these

         // Set up a group containing this column, add it to the HSGlobalsClass
         // single-col list, and allocate a new group for the next one.
         HS_ASSERT(colCount == 1); // some of the following code assumes this
         group = new(STMTHEAP) HSColGroupStruct;
         col = hs_globals->objDef->getColInfo(colNum); // colNum = position in table
         col.colnum = colNum;
         col.position = colPos;
         columnName = hs_globals->objDef->getColName(colNum);
         group->colSet.insert(col);
         group->oldHistid = histid;
         group->colCount = colCount;
         HS_ASSERT(reason == HS_REASON_EMPTY ||
                   reason == HS_REASON_SMALL_SAMPLE);
         group->newReason = HS_REASON_AUTO_INIT;
         *group->colNames += ToAnsiIdentifier(columnName);
         hs_globals->addGroup(group);
         prevGroup = group;

         // If reason for this single column histogram is not empty or small-stats,
         // all of the histograms being generated are NOT for missing statistics.
         // (If all single column histograms ARE for missing statistics, then
         // all histograms are.)  This flag is used to determine whether table
         // rowcounts should be reset.
         // (Note: the condition will never be true with the current code; it was
         //  left in place so it isn't forgotten if we add an obsolescence
         //  criterion for automation).
         if (reason != HS_REASON_EMPTY && reason != HS_REASON_SMALL_SAMPLE)
           hs_globals->allMissingStats = FALSE;
       }

     if (LM->LogNeeded() && (hs_globals->optFlags & SAMPLE_REQUESTED))
       LM->Log("Using sampling specified by user.");

     // Next read the multicolumn groups. Upon entry, the first column
     // of the first MC group has been read (unless retcode != 0).
     NABoolean skipToNextHist;
     NABoolean rowReadByPreviousLoop = TRUE;
     while (retcode == 0)
       {
         // Get the next row, except on the first iteration when we already have
         // the one read by the loop above. Query returns rows ordered by
         // colcount/histid/colpos, so MC columns are read in correct order.
         if (rowReadByPreviousLoop)
           {
             HS_ASSERT(colPos == 0);
             rowReadByPreviousLoop = FALSE;  // need to read a new one next time
           }
         else
           {
             retcode = necStmt.fetch(5,
                                   (void *)&histid, (void *)&colPos,
                                   (void *)&colNum, (void *)&colCount,
                                   (void *)&reason
                                   );
             if (retcode)  // end of data or error
               break;
           }

         // Since the results are ordered by column position within histogram
         // id, we know we're starting a new group when colPos is 0.
         // All rows representing the MC will have the same reason and colcount
         // values, so we can tell whether the MC should be included in the
         // necessary histograms by looking at the first row. If it is to be
         // included, create the group and set the values for it that are not
         // column-specific. Below, the component columns of the MC are added
         // to the group, one per iteration of this loop.
         if (colPos == 0)
           {
             // Skip if not an empty histogram or if # cols in the histogram is
             // larger than that allowed for automation. An MC histogram will
             // never have a reason of HS_REASON_SMALL_SAMPLE.
             skipToNextHist = (reason != HS_REASON_EMPTY || colCount > maxMCWidthForAutomation);
             if (!skipToNextHist)
               {
                 group = new(STMTHEAP) HSColGroupStruct;
                 group->oldHistid = histid;
                 group->colCount = colCount;
                 HS_ASSERT(reason == HS_REASON_EMPTY);
                 group->newReason = HS_REASON_AUTO_INIT;
               }
           }

         // For any colPos, avoid rest of loop if this MC is not necessary.
         // This ignores all rows for the columns that make up the MC.
         // skipToNextHist will be assigned a new value on the next iteration
         // in which colPos==0, which marks the beginning of the set of rows
         // for the next MC.
         if (skipToNextHist)
           continue;

         // Add this column to the group.
         col = hs_globals->objDef->getColInfo(colNum); // colNum is position in table
         col.colnum = colNum;
         col.position = colPos;
         group->colSet.insert(col);
         columnName = hs_globals->objDef->getColName(colNum);
         *group->colNames += ToAnsiIdentifier(columnName);
         if (colPos < colCount - 1)
           *group->colNames += ", ";

         // Make sure that a single column group exists for this column.  If not,
         // create it.  This can occur if an MC histogram is empty, but the corresponding
         // single column histograms are not. Note that MC stats don't have a reason of
         // HS_REASON_SMALL_SAMPLE.
         if (hs_globals->findGroup(colNum) == NULL)
           {
             sgroup2 = new(STMTHEAP) HSColGroupStruct;
             if (sgroup)
               // Link single col groups so they can be added later. Note that
               // this is a partially-constructed link; we can only traverse
               // from the end of the list to the front.
               sgroup2->prev = sgroup;
             sgroup = sgroup2;
             col = hs_globals->objDef->getColInfo(colNum); // colNum = position in table
             col.colnum = colNum;
             col.position = 0;
             sgroup->colSet.insert(col);
             sgroup->oldHistid = 0;
             sgroup->colCount = 1;
             HS_ASSERT(reason == HS_REASON_EMPTY);
             group->newReason = HS_REASON_AUTO_INIT;
             *sgroup->colNames += ToAnsiIdentifier(columnName);
           }

         // If complete, check to see if it is a duplicate.  If not, add the group
         // to the multicolumn list of HSGlobalsClass and allocate a new group.
         if (colPos == colCount - 1)
           {
             // Check to see if this is a duplicate group.
             prevGroup = hs_globals->findGroup(group);
             if (!prevGroup)
               {
                 hs_globals->addGroup(group);
                 while (sgroup) // Add the list of single col groups if necessary.
                   {
                     sgroup2 = sgroup->prev;
                     sgroup->prev = 0; // Zero out 'prev' before adding group.
                     hs_globals->addGroup(sgroup);
                     sgroup = sgroup2;
                   }
               }
             else
               {
                 // Duplicate, save histid to remove it later.  Delete MC group.
                 // No need to delete sgroup, none will be created for an MC duplicate.
                 sprintf(tempStr, " %u,", histid);
                 prevGroup->oldHistidList += tempStr;
                 delete group;
               }
           }
       }

     // Preserve failure retcode from fetch for return value.
     if (retcode < 0)
       {
         errorCatcher.setString1(" in call to fetch()");
         errorCatcher.finalize();  // before call to close() erases CLI diags area
         necStmt.close();
         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "AddNecessaryColumns: A fetch returned %d", retcode);
             LM->Log(LM->msg);
           }
       }
     else
       {
         retcode = necStmt.close();
         if (retcode < 0)
           {
             errorCatcher.setString1(" in call to close()");
             if (LM->LogNeeded())
               {
                 sprintf(LM->msg, "AddNecessaryColumns: close() returned %d", retcode);
                 LM->Log(LM->msg);
               }
           }
       }

     HSHandleError(retcode);

     // Use default sampling unless it was specified by the user.
     if (!(hs_globals->optFlags & SAMPLE_REQUESTED))
       {
         hs_globals->optFlags |= SAMPLE_BASIC_0;  // use default
         if (LM->LogNeeded())
           {
             // Preserve the text used in this log message; it is searched for
             // by certain ustat regression tests.
             LM->Log("Sampling for NECESSARY: default used");
           }
       }

     return retcode;
   }

 /****************************************************************/
 /* METHOD:  AddAllColumnsForIUS()                               */
 /* PURPOSE: Read in all histograms for the table                */
 /*                                                              */
 /* RETCODE:  0 - successful                                     */
 /*          -1 - failure                                        */
 /* PARAMS:  none                                                */
 /****************************************************************/
 Lng32 AddAllColumnsForIUS()
   {
     HSLogMan *LM = HSLogMan::Instance();
     HSTranMan *TM = HSTranMan::Instance();
     HSGlobalsClass *hs_globals = GetHSContext();

     if (LM->LogNeeded())
       {
         LM->Log("Determining NECESSARY columns");
       }

     Lng32 retcode;
     HSColGroupStruct *group = NULL, *sgroup = NULL, *sgroup2 = NULL, *prevGroup = NULL;
     HSColumnStruct col;
     NAString columnName;
     char tempStr[30];
     Int64 objID = hs_globals->objDef->getObjectUID(); // TABLE_UID
     double samplePercent;               // Sample percent for a previously generated hist.
     // Columns read from Histograms table.
     ULng32 histid;
     Lng32 colPos;
     Lng32 colNum;
     Lng32 colCount;
     Int64 rowCount;
     short samplePercentX100;  // Stored value is sample % * 100.
     double cv;
     char reason;

     // Keep track of info on single-column groups for processing multi-column
     // groups; READ_TIME, etc. columns are not maintained for the MC groups.
     struct SingleColStatus
       {
         SingleColStatus()
           : recentlyRead(FALSE), obsolete(FALSE)
           {}
         NABoolean recentlyRead;
         NABoolean obsolete;
       };

     SingleColStatus *singleCols
           = new (STMTHEAP) SingleColStatus[hs_globals->objDef->getNumCols()+1];  // +1 bec. 0 not used

     char baseGMTTimeStr[HS_TIMESTAMP_SIZE];
     Int64 oldestSecs = hs_getBaseTime();                      // Get the base time
     hs_formatTimestamp(oldestSecs, baseGMTTimeStr);            // Convert to GMT and timestamp string.

     // Start a transaction if one not already in progress. The query used to
     // get the current histograms would implicitly start one otherwise, and
     // we would finish the update stats statement still in a transaction that
     // the user did not start.
     //
     NABoolean startedTrans = ((TM->Begin("GET GROUP LIST FOR NECESSARY") == 0) ? TRUE : FALSE);

     HSCliStatement necStmt(HSCliStatement::CURSOR105_MX_2300,
                            (char *)hs_globals->hstogram_table->data(),
                            (char *)&objID,
                            (char *)&baseGMTTimeStr);

     retcode = necStmt.open();
     if (retcode < 0)
       {
         if (LM->LogNeeded())
           LM->Log("Failed to open HSCliStatement for CURSOR105_MX_2300");
         if (startedTrans)
           TM->Rollback();
         return retcode;
       }

     colCount = 1;                       // just to satisfy loop initially
     hs_globals->allMissingStats = TRUE; // Initialize flag.

     // First read through the single-column groups. The query for the cursor
     // we're using sorts by COLCOUNT, so the singles will come first. We need
     // their info before we can process the multicolumn groups.
     while (retcode == 0 && colCount == 1)
       {
         retcode = necStmt.fetch(8,
                                (void *)&histid, (void *)&colPos,
                                (void *)&colNum, (void *)&colCount,
                                (void *)&rowCount, (void *)&samplePercentX100,
                                (void *)&cv, (void *)&reason
                               );

         if (retcode || colCount > 1)  // end of data, error, or end of single-col groups
           break;
         samplePercent = ((float) samplePercentX100) / 100; // Stored value is % * 100.

         if (LM->LogNeeded())
           {
             sprintf(LM->msg, "\tFound column %s", hs_globals->objDef->getColName(colNum));
             LM->Log(LM->msg);
           }

         // If there is an existing histogram which is not obsolete, ignore the
         // current column.
         singleCols[colNum].recentlyRead = TRUE;  // query selects only these
         singleCols[colNum].obsolete = FALSE;

         if (reason != HS_REASON_EMPTY && reason != HS_REASON_SMALL_SAMPLE) // IUSR revisit
           {
             if (LM->LogNeeded())
               LM->Log("\t\t(ignored; existing histogram not empty, or small stats)");
             continue;  // don't keep this group
           }

         // Set up a group containing this column, add it to the HSGlobalsClass
         // single-col list, and allocate a new group for the next one.
         HS_ASSERT(colCount == 1); // some of the following code assumes this
         group = new(STMTHEAP) HSColGroupStruct;
         col = hs_globals->objDef->getColInfo(colNum); // colNum = position in table
         col.colnum = colNum;
         col.position = colPos;
         columnName = hs_globals->objDef->getColName(colNum);
         group->colSet.insert(col);
         group->oldHistid = histid;
         group->colCount = colCount;
         group->newReason = ((reason == HS_REASON_EMPTY ||
                              reason == HS_REASON_SMALL_SAMPLE)
                                         ? HS_REASON_AUTO_INIT
                                         : HS_REASON_AUTO_REGEN);
         *group->colNames += ToAnsiIdentifier(columnName);
         hs_globals->addGroup(group);
         prevGroup = group;

         // IUSR revisit
         // This flag (set to FALSE) is used to determine whether table rowcounts should be reset.
         hs_globals->allMissingStats = FALSE;
       }

     // Next read the multicolumn groups. Upon entry, the first column
     // of the first MC group has been read (unless retcode != 0).
     NABoolean skipToNextHist = FALSE;
     NABoolean mcEmpty=FALSE, mcObsolete=FALSE, mcSizeTooBig=FALSE;
     NABoolean rowReadByPreviousLoop = TRUE;
     while (retcode == 0)
       {
         // Get the next row, except on the first iteration when we already have
         // the one read by the loop above. Query returns rows ordered by
         // colcount/histid/colpos, so MC columns are read in correct order.
         if (rowReadByPreviousLoop)
           rowReadByPreviousLoop = FALSE;  // need to read a new one next time
         else
           {
             retcode = necStmt.fetch(8,
                                   (void *)&histid, (void *)&colPos,
                                   (void *)&colNum, (void *)&colCount,
                                   (void *)&rowCount, (void *)&samplePercentX100,
                                   (void *)&cv, (void *)&reason
                                   );
             if (retcode)  // end of data or error
               break;
           }


         // Add this column to the group.
         if (colPos == 0)
           {
             group = new(STMTHEAP) HSColGroupStruct;
             group->oldHistid = histid;
             group->colCount = colCount;

             // IUSR revisit
             group->newReason = (reason == HS_REASON_EMPTY
                                         ? HS_REASON_AUTO_INIT
                                         : HS_REASON_AUTO_REGEN);
           }
         col = hs_globals->objDef->getColInfo(colNum); // colNum is position in table
         col.colnum = colNum;
         col.position = colPos;
         group->colSet.insert(col);
         columnName = hs_globals->objDef->getColName(colNum);
         *group->colNames += ToAnsiIdentifier(columnName);
         if (colPos < colCount - 1)
           *group->colNames += ", ";

         // Make sure that a single column group exists for this column.  If not,
         // create it.  This can occur if an MC histogram is empty, but the corresponding
         // single column histograms are not.  Note that MC stats don't have a reason of
         // HS_REASON_SMALL_SAMPLE.
         if (hs_globals->findGroup(colNum) == NULL)
           {
             sgroup2 = new(STMTHEAP) HSColGroupStruct;
             if (sgroup) sgroup2->prev = sgroup; // Link single col groups so they
                                                 // can be added later.
             sgroup = sgroup2;
             col = hs_globals->objDef->getColInfo(colNum); // colNum = position in table
             col.colnum = colNum;
             col.position = 0;
             sgroup->colSet.insert(col);
             sgroup->oldHistid = 0;
             sgroup->colCount = 1;

             // IUSR revisit
             sgroup->newReason = (reason == HS_REASON_EMPTY
                                         ? HS_REASON_AUTO_INIT
                                         : HS_REASON_AUTO_REGEN);
             *sgroup->colNames += ToAnsiIdentifier(columnName);
           }

         // If complete, check to see if it is a duplicate.  If not, add the group
         // to the multicolumn list of HSGlobalsClass and allocate a new group.
         if (colPos == colCount - 1)
           {
             // Check to see if this is a duplicate group.
             if (!(prevGroup = hs_globals->findGroup(group)))
             {
               hs_globals->addGroup(group);
               while (sgroup) // Add the list of single col groups if necessary.
                 {
                   sgroup2 = sgroup->prev;
                   sgroup->prev = 0; // Zero out 'prev' before adding group.
                   hs_globals->addGroup(sgroup);
                   sgroup = sgroup2;
                 }
             }
             else {
               // Duplicate, save histid to remove it later.  Delete MC group.
               // No need to delete sgroup, none will be created for an MC duplicate.
               sprintf(tempStr, " %u,", histid);
               prevGroup->oldHistidList += tempStr;
               delete group;
             }
             mcEmpty = mcObsolete = mcSizeTooBig = FALSE;
           }
       }

     retcode = necStmt.close();

     if (startedTrans)
       TM->Commit();    // Just ends the transaction; no changes made

     // We will use as the sampling percentage the max we judge is needed for
     // any column, unless the smallest of those previously manually generated
     // exceeds it. If there were no obsolete columns (only columns with no
     // existing histogram), use the default sampling parameters.
     //

     return retcode;
   }


 /**********************************************************************/
 /* METHOD:  doubleToHSDataBuffer(const double dbl, HSDataBuffer& dbf) */
 /* PURPOSE: Save a double precision number in HSDataBuffer            */
 /* RETCODE:  0 - successful                                           */
 /*          -1 - failure                                              */
 /* INPUT:   double dbl: the number to save                            */
 /*          HSDataBuffer dbf: where to save the number                */
 /**********************************************************************/


 Lng32 doubleToHSDataBuffer(const double dbl, HSDataBuffer& dbf)
   {
     char dvalue[SQL_DOUBLE_PRECISION_DISPLAY_SIZE+1]={0};
     char *ptr;
     Lng32 retcode = 0;
     retcode = convFloat64ToAscii((char *)dvalue,
       SQL_DOUBLE_PRECISION_DISPLAY_SIZE, dbl,
       SQL_DOUBLE_PRECISION_FRAG_DIGITS, NULL, 0, false);
     if (retcode != 0)
       return -1;
     ptr = dvalue + SQL_DOUBLE_PRECISION_DISPLAY_SIZE - 1;
     while (*ptr == ' ')
       ptr--;
     *(ptr+1) = '\0';

     static NAWchar val[SQL_DOUBLE_PRECISION_DISPLAY_SIZE+1];
     memset((char*)val, 0, sizeof(val));

     short s = strlen(dvalue);
     na_mbstowcs((NAWchar *)val, dvalue, s);
     dbf = (NAWchar *)val;
     return retcode;
   }

 /**********************************************************************/
 /* METHOD:  managePersistentSamples()                                 */
 /* PURPOSE: Create or delete persistent sample tables from update     */
 /*          statistics command line. These are NOT the automatically  */
 /*          managed persistent samples used by IUS.                   */
 /* RETCODE:  0 - successful                                           */
 /*          -1 - failure                                              */
 /**********************************************************************/
 Lng32 managePersistentSamples()
 {
   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   HSGlobalsClass *hs_globals = GetHSContext();
   if (!hs_globals) retcode = -1;
   else
   {
     // initialize stats schema if our object is a Hive or native HBase table
     retcode = hs_globals->InitializeStatsSchema();
     HSHandleError(retcode);

     NAString table;
     Int64 sampleRows, tableRows;
     NABoolean isEstimate = FALSE;

     Int32 errorCode = 0;
     Int32 breadCrumb = 0;
     tableRows = hs_globals->objDef->getRowCount(isEstimate,
                                                 errorCode /* out */,
                                                 breadCrumb /* out */);
     if (errorCode)
       {
         *CmpCommon::diags() << DgSqlCode(-UERR_BAD_EST_ROWCOUNT) << DgInt0(errorCode) << DgInt1(breadCrumb);
         return -1;
       }

     // tableRows could be zero for a Trafodion or HBase table if the table is new
     // and all the data is still in memstore. So, in the logic below we dance around
     // that, attempting to supply a not unreasonable guess for tableRows in that case.
     // If we don't do this, then we later get a sampling ratio of -nan which will
     // cause a syntax error when we formulate the sampling query.

     if (hs_globals->optFlags & SAMPLE_BASIC_1)
       {
         sampleRows = hs_globals->sampleValue1;
         if (tableRows == 0)
           tableRows = sampleRows;  // just use the value the user gave
       }
     else if (hs_globals->optFlags & SAMPLE_RAND_1) // sampleValue1 is % * HS_SAMP_PCNT_UPSCALE. */
       {
         if (tableRows == 0)
           tableRows = 10000;  // just use a made-up number
         sampleRows = (Int64)(((double)hs_globals->sampleValue1/(HS_SAMP_PCNT_UPSCALE*100)) * tableRows);
       }
     else // hs_globals->optFlags & SAMPLE_ALL
       {
         if (tableRows == 0)
           tableRows = 10000;  // just use a made-up number
         sampleRows = tableRows/100; // use default sample size and then match all sample w/ large diff.
       }

     //Return -1 with error msg if sample rows greater than base table rows.
     if (sampleRows > tableRows)
     {
       HSFuncMergeDiags(- UERR_INVALID_OPTION,
                        "CREATE SAMPLE ROWS",
                        "a value less than or equal to base table rows");
       return -1;
     }

     HSPersSamples *sampleList = HSPersSamples::Instance(hs_globals->objDef->getCatName(),
                                                         hs_globals->objDef->getSchemaName());
     if (!sampleList) retcode = -1;
     else
     {
       if (hs_globals->optFlags & CREATE_SAMPLE_OPT)  /* create sample requested*/
       {
         if (sampleList->createAndInsert(hs_globals->objDef, table,
                                         sampleRows, tableRows,
                                         isEstimate,
                                         'M'))  // manually created persistent sample table
           retcode = -1;
         if (LM->LogNeeded())
           {
             char intStr[30];
             convertInt64ToAscii(sampleRows, intStr);
             sprintf(LM->msg, "Create persistent sample, %s, with %s rows.  Retcode=%d.",
                              table.data(), intStr, retcode);
             LM->Log(LM->msg);
           }
       }
       if (hs_globals->optFlags & REMOVE_SAMPLE_OPT)  /* remove sample requested*/
       {
         float allowedDiff = (float) CmpCommon::getDefaultNumeric(USTAT_SAMPLE_PERCENT_DIFF)/100;
         if (hs_globals->optFlags & SAMPLE_ALL)
           allowedDiff = 1e10; // Set to large value to find ALL samples.
         sampleList->removeMatchingSamples(hs_globals->objDef, sampleRows, allowedDiff);
         if (LM->LogNeeded())
           {
             char intStr1[30], intStr2[30];
             convertInt64ToAscii(hs_globals->objDef->getObjectUID(), intStr1);
             convertInt64ToAscii(sampleRows, intStr2);
             sprintf(LM->msg, "Remove persistent samples for UID=%s, with %s rows and diff=%f",
                               intStr1, intStr2, allowedDiff);
             LM->Log(LM->msg);
           }
       }
     }
   }
   return retcode;
 }

 NAString HSColGroupStruct::generateTextForColumnCast()
 {
   NAString textForColumnCast = ", ";
   NAString columnName = "", dblQuote = "\"";

   // This applies to MC groups only.
   // Add NVL function to mark individual columns returning NULL values correctly,
   // otherwise the entire output as NULL
   NABoolean isMCGroup = FALSE;
   NAString nvlTextFirstPart = "NVL(", nvlTextLastPart = ", 'NULL')";
   NAString replaceFuncFirstPart = "REPLACE(", replaceFuncLastPart = ", '''', '''''')";

   NABoolean firstColumn = TRUE;
   const Lng32 maxCharBoundaryLen = (Lng32) CmpCommon::getDefaultNumeric(USTAT_MAX_CHAR_BOUNDARY_LEN);

   if (colCount > 1)
     isMCGroup = TRUE;

   for (Int32 i=0; i<colCount; i++)
   {
     HSColumnStruct &col = colSet[i];
     columnName = ToAnsiIdentifier(col.colname->data());

     // Surround column name with double quotes, if not already delimited.
     if (columnName.data()[0] != '"')
       columnName=dblQuote+columnName+dblQuote;

     if(!firstColumn)
       textForColumnCast.append(" || ',' || ");

     NABoolean isACharColumn = DFS2REC::isAnyCharacter(col.datatype);

     if(isMCGroup)
     {
       if(isACharColumn)
         textForColumnCast.append(" '''' || ");
       textForColumnCast.append(nvlTextFirstPart);
     }

     //We must use TRANSLATE to convert non-unicode character strings
     //to unicode
     if (isACharColumn)
     {
       if(isMCGroup)
         textForColumnCast.append(replaceFuncFirstPart);

       NAString fromCS(CharInfo::getCharSetName(col.charset));
       HS_ASSERT(fromCS != SQLCHARSETSTRING_UNKNOWN);

       //10-040322-4394
       //We need to trim trailing blanks when the data is in its natural
       //character set form - NOT when it has been translated. Otherwise,
       //we may trim incorrect blanks.
       //
       //10-040224-3482
       //Since there is no direct translation between KANJI->UCS2 and
       //KCS5601->UCS2, we must first translate to ISO88591, then to UCS2
       switch (col.charset)
         {
           //KANJI/KCS::
           //  TRANSLATE(
           //    TRANSLATE(
           //      TRIM(TRAILING FROM
           //        SUBSTRING(<col>, 1, <#>)
           //      )
           //    USING <cs>TOISO88591)
           //  USING ISO88591TOUCS2)
           case CharInfo::KSC5601_MP:
           case CharInfo::KANJI_MP:
             {
               textForColumnCast.append("TRANSLATE(TRANSLATE(TRIM(TRAILING FROM SUBSTRING(");
               textForColumnCast.append(columnName.data());
               textForColumnCast.append(", 1, ");
               textForColumnCast.append(LongToNAString(maxCharBoundaryLen));
               textForColumnCast.append(")) USING ");
               textForColumnCast.append(fromCS.data());
               textForColumnCast.append("TOISO88591) USING ISO88591TOUCS2)");
               break;
             }

           //UNICODE:
           //    TRIM(TRAILING FROM
           //      SUBSTRING(<col>, 1, <#>)
           //    )
           case CharInfo::UNICODE:
             {
               textForColumnCast.append("TRIM(TRAILING FROM SUBSTRING(");
               textForColumnCast.append(columnName.data());
               textForColumnCast.append(", 1, ");
               textForColumnCast.append(LongToNAString(maxCharBoundaryLen));
               textForColumnCast.append(")) ");
               break;
             }

           //OTHER CHARACTER DATATYPES:
           //    TRANSLATE(
           //      TRIM(TRAILING FROM
           //        SUBSTRING(<col>, 1, #)
           //      )
           //    USING <cs>TOUCS2)
           default:
             {
               textForColumnCast.append("TRANSLATE(TRIM(TRAILING FROM SUBSTRING(");
               textForColumnCast.append(columnName.data());
               textForColumnCast.append(", 1, ");
               textForColumnCast.append(LongToNAString(maxCharBoundaryLen));
               textForColumnCast.append(")) USING ");
               textForColumnCast.append(fromCS.data());
               textForColumnCast.append("TOUCS2)");
               break;
             }
         }
       if(isMCGroup)
         textForColumnCast.append(replaceFuncLastPart);
     }
   else if (colSet[i].datatype == REC_BOOLEAN)
     {
       // CAST of boolean to VARCHAR UCS2 isn't supported in the
       // engine yet (you get error 8414 at run-time if you try it),
       // so work around this by CASTing to ISO88591 then CASTing
       // to UCS2. Once the engine supports this cast we can
       // delete this code and just use the "else" case below.
       textForColumnCast.append("TRIM(TRAILING FROM CAST (CAST (");
       textForColumnCast.append(columnName.data());
       textForColumnCast.append(" AS CHAR(10)) AS VARCHAR(");
       textForColumnCast.append(LongToNAString(maxCharBoundaryLen));
       textForColumnCast.append(") CHARACTER SET UCS2))");
     }
   else
     {
       //CAST ALL OTHER DATATYPES TO UNICODE
       //      TRIM(TRAILING FROM
       //        CAST(<col> AS VARCHAR(#) CHARACTER SET UCS2)
       //      )
       textForColumnCast.append("TRIM(TRAILING FROM CAST(");
       textForColumnCast.append(columnName.data());
       textForColumnCast.append(" AS VARCHAR(");

       // for BIGNUM, increase the cast length
       // the largest possible length is 130, for example, in
       // 0.123456.... (1 leading zero + 1 decimal point + 128 precision)
       if (DFS2REC::isBigNum(colSet[i].datatype))
         textForColumnCast.append(LongToNAString(HS_MAX_UCS_BOUNDARY_CHAR));
       else
         textForColumnCast.append(LongToNAString(maxCharBoundaryLen));
       textForColumnCast.append(") CHARACTER SET UCS2))");
     }

     if(isMCGroup)
     {
       textForColumnCast.append(nvlTextLastPart);
       if(isACharColumn)
         textForColumnCast.append(" || ''''");
     }

     if(firstColumn)
       firstColumn = FALSE;
   }
   return textForColumnCast;
 }

 void HSInMemoryTable::generateSelectList(NAString& queryText)
 {
   // Create query to get data for columns in PENDING state.
   HSColGroupStruct* group = columns_;

   NABoolean firstExpn = TRUE;
   do
     {
        if ( group->state == PENDING )
        {
           if (firstExpn)
             firstExpn = FALSE;
           else
             queryText.append(", ");
           queryText.append(group->ISSelectExpn);
        }
     }
   while (group = group->next);
 }

 void HSInMemoryTable::generateInsertSelectDQuery(
                     NAString& targetTable, NAString& smplTable,
                     NAString& queryText)
 {
   if (whereCondition_.length() == 0)
     return;

   // Produce the following string
   // insert into <smplTable>_D (
   // select * from <smplTable> where <whereCondition>
   //                         )
   //
   queryText.append("INSERT INTO ");

   queryText.append(targetTable);

   queryText.append(" (SELECT * ");

   queryText.append(" FROM ");

   queryText.append(smplTable.data());

   queryText.append(" WHERE ");
   queryText.append(whereCondition_);

   queryText.append(" )");
 }


 void HSInMemoryTable::generateSelectDQuery(NAString& smplTable, NAString& queryText)
 {
   if (whereCondition_.length() == 0)
     return;

   // Produce the following string
   // select <selList> from <smplTable> where
   //
   queryText.append("SELECT ");

   NAString selectList;
   generateSelectList(selectList);
   queryText.append(selectList);

   queryText.append(" FROM ");

   queryText.append(smplTable.data());

   queryText.append(" WHERE ");
   queryText.append(whereCondition_);
   queryText.append(" FOR READ UNCOMMITTED ACCESS");
 }


 void
 HSInMemoryTable::generateInsertSelectIQuery(NAString& targetTable,
                                             NAString& sourceTable,
                                             NAString& queryText,
                                             NABoolean hasOversizedColumns,
                                             HSTableDef * objDef,
                                             Int64 futureSampleSize,
                                             Int64 currentSampleSize,
                                             Int64 sourceTableSize)
 {
   if (whereCondition_.length() == 0)
     return;

   // Create query to get data for the desired columns.
   //
   // upsert using load into <tmpTable>
   //  (select <selList> from <sourceTable> where <whereCond> <sample>)
   //                            T
   //

   queryText.append("UPSERT USING LOAD INTO ");

   queryText.append(targetTable.data());

   queryText.append(" (SELECT ");

   // Generate the select list. Truncate any over-long char/varchar columns
   // by using SUBSTRING calls. Omit any LOB columns.
   objDef->addTruncatedSelectList(queryText);

   queryText.append(" FROM ");

   queryText.append(sourceTable.data());
   queryText.append(" WHERE ");
   queryText.append(whereCondition_);
   queryText.append(" ");

   NABoolean usePeriodic =
     (CmpCommon::getDefault(USTAT_IUS_USE_PERIODIC_SAMPLING) == DF_ON);

   // First compute the sample rate as
   //   currentSampleSize - deleteSetSize = remainingUndeleteRows
   //   futureSampleSize - remainingUndeleteRows = rowsToBeInserted
   //   new sample rate = rowsToBeInserted / sourceTableSize

   //Int64 remainingUndeleteRows = currentSampleSize - deleteSetSize;
   //Int64 rowsToBeInserted = futureSampleSize - remainingUndeleteRows;
   //Int64 newSampleRate = rowsToBeInserted / sourceTableSize;

   double newSampleRate = sampleRate_;

   if (newSampleRate > 0) {
      NAString sampleClause;
      if ( usePeriodic ) {
        //
        // periodic 1 rows in every x rows
        // Let m denote # of x-row sample set, in which 1 row will be picked.
        // m = newSampleRate * rows
        // x * m = rows
        // x = rows / m = rows / (newSampleRate * rows) = 1/ newSampleRate
        //
        Int64 sv1 = 1;
        Int64 sv2 = (Int64)ceil((double)(1 / newSampleRate));

        if ( sv1 < sv2 )
          createSampleOption(SAMPLE_PERIODIC, newSampleRate * 100.0,
                             sampleClause, sv1, sv2);
      } else {
        createSampleOption(SAMPLE_RAND_1, newSampleRate * 100.0,
                           sampleClause, 0, 0);
      }
      queryText.append(sampleClause);
   }

   queryText.append(")");
 }

 void
 HSInMemoryTable::generateSelectIQuery(NAString& smplTable,
                                       NAString& queryText)
 {
   // Create query to get data for the desired columns.
   //
   //  select <selList> from <sourceTable>
   //

   queryText.append("SELECT ");

   NAString selectList;
   generateSelectList(selectList);
   queryText.append(selectList);

   queryText.append(" FROM ");
   queryText.append(smplTable.data());

   queryText.append("_I FOR READ UNCOMMITTED ACCESS");
 }


 // used by alg1
 void
 HSInMemoryTable::generateInsertQuery(NAString& smplTable, NAString& sourceTable,
                                      NAString& queryText, NABoolean addNoRollback)
 {
   if (whereCondition_.length() == 0)
     return;

   // Create query to get data for the desired columns.
   //
   // select * from (insert into <smplTbl>
   //                 (select * from <targetTbl> where <whereCond> <sample>)
   //                           ) T
   //

   //queryText.append("SELECT * FROM (INSERT INTO ");

   if ( addNoRollback )
     queryText.append("INSERT WITH NO ROLLBACK INTO "); // for algorithm 1
   else
     queryText.append("INSERT INTO "); // for algorithm 1

   queryText.append(smplTable.data());

   queryText.append(" (SELECT * FROM ");

   queryText.append(sourceTable.data());
   queryText.append(" WHERE ");
   queryText.append(whereCondition_);
   queryText.append("  ");

   NABoolean usePeriodic =
     (CmpCommon::getDefault(USTAT_IUS_USE_PERIODIC_SAMPLING) == DF_ON);

   if (sampleRate_ > 0)
     {
       NAString sampleClause;
       if ( usePeriodic )
         {
           //
           // periodic 1 rows in every x rows
           // Let m denote # of x-row sample set, in which 1 row will be picked.
           // m = sampleRate_ * rows
           // x * m = rows
           // x = rows / m = rows / (sampleRate_ * rows) = 1/sampleRate_
           //
           Int64 sv1 = 1;
           Int64 sv2 = (Int64)ceil(1 / sampleRate_);

           if ( sv1 < sv2 )
             createSampleOption(SAMPLE_PERIODIC, sampleRate_ * 100.0,
                                 sampleClause, sv1, sv2);
         }
       else
         createSampleOption(SAMPLE_RAND_1, sampleRate_ * 100.0,
                            sampleClause, 0, 0);

       queryText.append(sampleClause);
     }

   queryText.append(")");
   //queryText.append(")) T");
 }

 Lng32 HSInMemoryTable::populate(NAString& queryText)
 {
   HS_ASSERT(!isPopulated_);

   HSLogMan *LM = HSLogMan::Instance();
   Lng32 retcode = 0;
   Int64 rowsLeft;
   HSCursor popCursor;

   // the most likely error is on a prepare due to a bad WHERE clause
   // from the UPDATE STATS command itself; e.g. a syntax error or
   // perhaps a bad column reference due to a typo
   HSErrorCatcher errorCatcher(retcode, - UERR_IUS_BAD_WHERE_CLAUSE,
                               "POPULATE_FROM_QUERY", TRUE);
   LM->Log("Preparing rowset...");
   // Allocate descriptors and statements for CLI and prepare rowset by
   // assigning location for results to be written.
   rowsLeft = rows_;

   HSFuncExecQuery("CONTROL QUERY DEFAULT ALLOW_DML_ON_NONAUDITED_TABLE 'ON'");


   // prepareRowset may do retries
   retcode = popCursor.prepareRowset(queryText.data(), FALSE, columns_,
        (Lng32)MINOF(MAX_ROWSET, rowsLeft));
   if (retcode < 0)
     {
       sprintf(LM->msg, "Error in prepareRowset for statement:\n%s", queryText.data());
       LM->Log(LM->msg);
       HSHandleError(retcode);
     }
   else
     retcode=0; // Set to 0 for warnings.
   LM->Log("...rowset prepared");

   LM->Log("fetching rowsets...");
   if (LM->LogNeeded())
     LM->StartTimer("Fetching rowsets");
   Int64 rowCount = 0;
   while (retcode >= 0          // allow warnings
          && retcode != HS_EOF  // exit if no more data
          && rowsLeft > 0)      // internal CLI error if 0 used for # rows to read
     {
       retcode = popCursor.fetchRowset();
       if (retcode == 0)  // 1 or more rows successfully read
         {
           rowCount += popCursor.rowsetSize();
           rowsLeft = rows_ - rowCount;
           retcode = HSGlobalsClass::processInternalSortNulls(popCursor.rowsetSize(), columns_);

           if ( retcode != 0 ) {

             HSFuncExecQuery("CONTROL QUERY DEFAULT ALLOW_DML_ON_NONAUDITED_TABLE reset");

             HSHandleError(retcode);
           }

           retcode = popCursor.setRowsetPointers(columns_,
                                                 (Lng32)MINOF(MAX_ROWSET, rowsLeft));
         }
     }

   HSFuncExecQuery("CONTROL QUERY DEFAULT ALLOW_DML_ON_NONAUDITED_TABLE reset");

   if (retcode < 0) HSHandleError(retcode) else retcode=0; // Set to 0 for warnings.

   if (LM->LogNeeded())
     LM->StopTimer();

   LM->Log("...done fetching rowsets");
   isPopulated_ = TRUE;
   rows_ = rowCount;  // Actual number of rows read into memory
   return retcode;
 }

 extern HSColGroupStruct*
 AddSingleColumn(const Lng32, HSColGroupStruct*&, NABoolean prepend = TRUE);

 void HSInMemoryTable::setUpColumns()
 {
   HSGlobalsClass *hs_globals = GetHSContext();
   HSColGroupStruct* group = hs_globals->singleGroup;
   HSColGroupStruct* newGroup;
   while (group != NULL)
     {
       HSColumnStruct& col = group->colSet[0];
       newGroup = AddSingleColumn(col.colnum, columns_, FALSE);
       newGroup->state = UNPROCESSED;
       group = group->next;
     }

   // For each column get the C++ type used to store it internally, determine the
   // amount required memory required for all values of the column, then allocate
   // the memory.
   //@NOTE: Should check if internal sort types have already been mapped for the
   //       globals col list, and just copy type info if so.
   mapInternalSortTypes(columns_);
   HSGlobalsClass::getMemoryRequirements(columns_, rows_);
 }

 void HSInMemoryTable::logState(const char* title)
 {
    // Create query to get data for columns in PENDING state.
   HSColGroupStruct* group = columns_;
   HSLogMan *LM = HSLogMan::Instance();

   sprintf(LM->msg, "%s, groups in PENDING state: ", title);
   LM->Log(LM->msg);
   do
     {
        if ( group->state == PENDING )
        {
          sprintf(LM->msg, "(%s, " PF64 ")", group->colSet[0].colname->data(), group->memNeeded);
          LM->Log(LM->msg);
        }
     }
   while (group = group->next);
   sprintf(LM->msg, " ");
   LM->Log(LM->msg);

 }

 //CollIndex HSGlobalsClass::selectFastStatsBatch(NAArray<HSColGroupStruct*>& colGroups)
 CollIndex HSGlobalsClass::selectFastStatsBatch(HSColGroupStruct** colGroups)
 {
   HSColGroupStruct* group = singleGroup;
   CollIndex groupCount = 0;
   while (group != NULL)
     {
       if (group->state == UNPROCESSED
           && !DFS2REC::isAnyCharacter(group->ISdatatype))  //@ZXbl -- temp restriction
         {
           //@ZXbl -- for now just return 1 column. Later, return as many as we
           //         have memory for.
           group->state = PENDING;
           colGroups[groupCount++] = group;
           break;    //@ZXbl -- for now, do 1 column at a time
         }
       group = group->next;
     }

   return groupCount;
 }

 //Lng32 HSGlobalsClass::processFastStatsBatch(CollIndex numCols, NAArray<HSColGroupStruct*> colGroups)
 Lng32 HSGlobalsClass::processFastStatsBatch(CollIndex numCols, HSColGroupStruct** colGroups)
 {
   Lng32 retcode = 0;
   HSCursor cursor;
   CollIndex i;
   HSColGroupStruct* group = NULL;
   HSLogMan *LM = HSLogMan::Instance();

   for (i=0; i<numCols; i++)
     {
       group = colGroups[i];

       //@ZXbl -- memory alloc may be moved later. Also needs to be able to recover
       //         from insufficient memory.
       if (!group->allocateISMemory(MAX_ROWSET,
                                    TRUE,        // alloc strdata if a char type
                                    FALSE))      // no recalc memneeded (IUS)
         {
           diagsArea << DgSqlCode(UERR_FASTSTATS_MEM_ALLOCATION_ERROR);
           retcode = -1;
           HSHandleError(retcode);
         }

       // setRowsetPointers() binds group->nextData to output. It is also used by
       // internal sort, which fetches all rowsets into memory before processing
       // any data.
       group->nextData = group->data;

       // This will be owned by the FastStatsHist object it is used to construct below.
       FastStatsCountingBloomFilter* cbf =
           new(STMTHEAP) FastStatsCountingBloomFilter(STMTHEAP, 5, sampleRowCount/2,
               .01, 255);

       switch (group->ISdatatype)
       {
         case REC_BIN8_SIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Int8>(group, cbf);
           break;

         case REC_BOOLEAN:
         case REC_BIN8_UNSIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<UInt8>(group, cbf);
           break;

         case REC_BIN16_SIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Int16>(group, cbf);
           break;

         case REC_BIN16_UNSIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<UInt16>(group, cbf);
           break;

         case REC_BIN32_SIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Int32>(group, cbf);
           break;

         case REC_BIN32_UNSIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<UInt32>(group, cbf);
           break;

         case REC_BIN64_SIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Int64>(group, cbf);
           break;

         case REC_BIN64_UNSIGNED:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<UInt64>(group, cbf);
           break;

         case REC_IEEE_FLOAT32:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Float32>(group, cbf);
           break;

         case REC_IEEE_FLOAT64:
           group->fastStatsHist = new(STMTHEAP) FastStatsHist<Float64>(group, cbf);
           break;

         case REC_BYTE_F_ASCII:
         case REC_BYTE_F_DOUBLE:
           //group->fastStatsHist = new(STMTHEAP) FastStatsHist<ISFixedChar*>(group, cbf);
           LM->Log("char types not yet supported for fast-stats");
           retcode=-1;
           HSHandleError(retcode);
           break;

         case REC_BYTE_V_ASCII:
         case REC_BYTE_V_DOUBLE:
           //group->fastStatsHist = new(STMTHEAP) FastStatsHist<ISVarChar*>(group, cbf);
           LM->Log("char types not yet supported for fast-stats");
           retcode=-1;
           HSHandleError(retcode);
           break;

         default:
           sprintf(LM->msg, "processFastStatsBatch(): unknown type %d",
                            group->ISdatatype);
           LM->Log(LM->msg);
           retcode=-1;
           HSHandleError(retcode);
           break;
       }
     }

   retcode = prepareToReadColumnsIntoMem(&cursor, MAX_ROWSET);
   while (retcode >= 0          // allow warnings
          && retcode != HS_EOF) // exit if no more data
     {
       retcode = cursor.fetchRowset();
       if (retcode == 0)  // 1 or more rows successfully read
         {
           for (i=0; i<numCols; i++)
             {
               colGroups[i]->fastStatsHist->addRowset(cursor.rowsetSize());
             }
         }
     }

   cursor.close();

   // All the data is now represented in CBFs, so the buffers used to read the
   // data into can be freed.
   for (i=0; i<numCols; i++)
     {
       colGroups[i]->freeISMemory();
     }

   // Finish processing the histogram for each column and mark it as completed.
   for (i=0; i<numCols; i++)
     {
       group = colGroups[i];
       group->fastStatsHist->actuate(intCount);
       group->state = PROCESSED;
       delete group->fastStatsHist;
       group->fastStatsHist = NULL;
     }

   return retcode;
 }

 Lng32 HSGlobalsClass::CollectStatisticsWithFastStats()
 {
   Lng32 retcode = 0;

   mapInternalSortTypes(singleGroup, TRUE);
   getMemoryRequirements(singleGroup, MAX_ROWSET);

   //NAArray<HSColGroupStruct*> colGroups(20); //singleGroupCount);
   HSColGroupStruct** colGroups;
   colGroups = new(STMTHEAP) HSColGroupStruct*[singleGroupCount];

   CollIndex numCols;
   do
   {
     numCols = selectFastStatsBatch(colGroups);
     if (numCols > 0)
       retcode = processFastStatsBatch(numCols, colGroups);
   } while (numCols > 0 && retcode >= 0);

   return retcode;
 }