core/sql/ustat/hs_globals.h - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 #ifndef HSGLOBALS_H
 #define HSGLOBALS_H
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         hs_globals.h
  * Description:  Global structures.
  * Created:      03/25/96
  * Language:     C++
  *
  *
  *
  *
  *****************************************************************************
  */

 #include "hs_const.h"
 #include "hs_cont.h"
 #include "hs_cli.h"
 #include "hs_la.h"
 #include "BloomFilter.h"
 #include "nawstring.h"
 #include "Collections.h"
 #include "ComVersionDefs.h"
 #include "ComSmallDefs.h"
 #include "NABitVector.h"
 #include <exp_function.h>

 // -----------------------------------------------------------------------
 // Externals.
 // -----------------------------------------------------------------------
 class ComDiagsArea;

 extern THREAD_P Int32       lengthOfSortBufrs ;
 extern THREAD_P char *    sortBuffer1 ;
 extern THREAD_P char *    sortBuffer2 ;

 typedef NAHashDictionary<NAString, double> JitLogHashType;

 // -----------------------------------------------------------------------
 // Forward.
 // -----------------------------------------------------------------------
 struct HSColumnStruct;
 struct HSColGroupStruct;
 struct HSColDesc;
 class HSGlobalsClass;
 class HSInterval;
 class HSHistogram;
 class HSInMemoryTable;
 class AbstractFastStatsHist;

 Lng32 AddNecessaryColumns();
 Lng32 AddAllColumnsForIUS();

 void createSampleOption(Lng32 sampleType, double samplePercent, NAString &sampleOpt,
                         Int64 sampleValue1=0, Int64 sampleValue2=0);
 Lng32 doubleToHSDataBuffer(const double dbl, HSDataBuffer& dbf);
 Lng32 managePersistentSamples();

 template <class T>
 Lng32 setBufferValue(T& value,
                       const HSColGroupStruct *group,
                       HSDataBuffer &boundary);


 // An instance of ISFixedChar represents a value of a fixed-length character
 // string (either single or double-byte) retrieved into memory for use by
 // internal sort. A pointer to the actual string is maintained, and definitions
 // are provided for all operators used by the template functions that implement
 // the internal sort processing.
 //
 // The static member variable 'length' is used to store the fixed length of the
 // referenced strings, avoiding the need to store it separately for each
 // instance. However, the class is used for strings of any length, so before
 // processing a char(n) column for internal sort, setLength(n) must be called.
 // The length is in bytes, not characters.
 //
 // memcmp is used in the implementation of equality operators, because this
 // will work for both single and double-byte strings. For UCS2 comparison we
 // use na_wcsnncmp(), but for regular character columns we use memcmp.
 //
 // When a char column is sorted, the actual strings remain in place in the
 // buffer they are originally read into. An array of ISFixedChar objects that
 // reference those strings are exchanged instead to perform the sort.
 //
 class ISFixedChar
 {
   public:
     ISFixedChar()
       : content(NULL)
     {}

     // Copy ctor used by placeWidePivot() to create an instance that is the
     // pivot value. Can't use reference to existing element in array, because
     // it will be overwritten as elems are moved around during sort.
     ISFixedChar(const ISFixedChar& other)
     {
       content = other.content;
     }

     void static setLength(Int32 len)
     {
       length = len;
     }

     Int32 static getLength()
     {
       return length;
     }

     void static setCaseInsensitive(NABoolean ci)
     {
       caseInsensitive = ci;
     }

     static void setColCollation(CharInfo::Collation Collation)
     {
       colCollation = Collation;
     }

     static void setCharSet(CharInfo::CharSet CharSet)
     {
       charset = CharSet;
     }

     char* getContent()
     {
       return content;
     }

     void setContent(char* ptr)
     {
       content = ptr;
     }

     // Have to define new[] and delete[] here if we want to use NAHeap, because
     // NABasicObject does not define array versions of those operators. Even if
     // it did, we wouldn't subclass it because it would make the objects bigger
     // (NABasicObject has a heap ptr member variable).
     /*
     void* operator new[](size_t size)
     {
       return STMTHEAP->allocateMemory(size, FALSE);
     }

     void operator delete[](void *addr)
     {
       STMTHEAP->deallocateMemory(addr);
     }
 */

     // Note that we forego the usual convention of having operator= return a
     // reference to the assigned-to object. This is just to make this operation
     // as efficient as possible, since it will be performed many times.
     void operator=(const ISFixedChar &rhs)
     {
       content = rhs.content;
     }

     void operator=(char* ptr)
     {
       content = ptr;
     }

     // Compare this object to rhs, returning negative value if less, 0 if equal,
     // and positive value if greater.
     Int32 compare(const ISFixedChar &rhs);

     Int32 operator==(const ISFixedChar &rhs)
     {
       // Note that case insensitive is not supported with non-binary collation.
       if (CollationInfo::isSystemCollation(colCollation))
           return (Collated_cmp(content, rhs.content, length, colCollation,
                                sortBuffer1, sortBuffer2 ) == 0);

       // UCS2 cols not supported in MODE_SPECIAL_1 or 2 and do not support case insensitivity.
       // memcmp() can be used here because we are looking for equality.
       if (!caseInsensitive) return !memcmp(content, rhs.content, length);
       else                  return !hs_strncasecmp(content, rhs.content, length);
     }

     Int32 operator!=(const ISFixedChar &rhs)
     {
       return !(*this == rhs);
     }

     Int32 operator<(const ISFixedChar &rhs)
     {
       return (compare(rhs) < 0);
     }

     Int32 operator<=(const ISFixedChar &rhs)
     {
       return (compare(rhs) <= 0);
     }

     Int32 operator>(const ISFixedChar &rhs)
     {
       return (compare(rhs) > 0);
     }

     Int32 operator>=(const ISFixedChar &rhs)
     {
       return (compare(rhs) >= 0);
     }

     // These operators must be defined to allow this type to be used with
     // existing templates.
     operator Int64() { fail("Int64()", __LINE__); return 0; };
     operator Int32() { fail("int()", __LINE__); return 0; };
     Int32 operator/(Int32 i) { fail("/", __LINE__); return 0; };
     Int32 operator%(Int32 i) { fail("%", __LINE__); return 0; };
     Int32 operator>=(Int32 i) { fail(">=", __LINE__); return 0; };
     Int32 operator<(Int32 i) { fail("<", __LINE__); return 0; };
     ISFixedChar& operator-() { fail("-", __LINE__); return *this; };

   protected:
     // Give internal error if undefined operator invoked.
     void fail(const char* opName, Lng32 line);

     // To make ISFixedChar as lightweight as possible, we use a static member to hold
     // the length and case sensitivity, rather than repeating it for each instance.
     // These must be set before each char column is processed.
     // Likewise with column collation.
     static THREAD_P Int32 length;
     static THREAD_P NABoolean caseInsensitive;
     static THREAD_P CharInfo::Collation colCollation;
     static THREAD_P CharInfo::CharSet charset;

     // The content is a fixed-length string, where the length is the current
     // value of the static 'length' member variable.
     char* content;
 };


 // This class extends ISFixedChar and is used to represent values of a fixed-length
 // character string being processed for IUS. It is used in two ways:
 //   1) In the arrays of interval boundary values and MFV values used in
 //      processIUSColumns. When used for this purpose, the 'content' (inherited)
 //      member variable has a single, fixed value, which the instance owns and
 //      takes responsibility for deleting.
 //   2) To serially represent the values of a column in an in-memory table. In
 //      this case, a single instance of the class is used in conjunction with
 //      the IUSValueIterator class and assumes each in-memory value of the
 //      column in turn as the next() member function is called to move the
 //      content ptr to the next value. These values assumed by the ptr are
 //      addresses within the strData buffer of HSColGroupStruct, and so are
 //      not owned by this class.
 class IUSFixedChar : public ISFixedChar
 {
   public:
     IUSFixedChar(NABoolean ownsContent = TRUE)
       : ISFixedChar(),
         ownsContent_(ownsContent)
     {}

     virtual ~IUSFixedChar()
     {
       // Content may be allocated and owned for this subclass, but not ISFixedChar.
       if (ownsContent_)
         NADELETEBASIC(content, STMTHEAP);
     }

     // Assignment from an HSDataBuffer is how the object is initialized to an
     // interval boundary or MFV value.
     void operator=(const HSDataBuffer& buff);

     // Move content ptr to start of next value.
     void next()
       {
         content += (length * (charset == CharInfo::UNICODE ? sizeof(NAWchar) : 1));
       }

   private:
     // If TRUE, content must be deleted when this object goes away.
     NABoolean ownsContent_;
 };


 // This class performs the same function as ISFixedChar, except for varying
 // length character strings. See the documentation of ISFixedChar for more
 // information. The essential difference is that an ISVarChar object points
 // to storage that includes the actual length of the string in the first two
 // bytes, followed immediately by the string itself.
 class ISVarChar
 {
   public:
     ISVarChar()
       : content(NULL)
     {}

     char* getContent()
     {
       return content;
     }

     short getLength()
     {
       return *(Int16*)content;
     }

     void setContent(char* ptr)
     {
       content = ptr;
     }

     void static setDeclaredLength(Int32 len)
     {
       declaredLength = len;
     }

     void static setCaseInsensitive(NABoolean ci)
     {
       caseInsensitive = ci;
     }

     static void setColCollation(CharInfo::Collation Collation)
     {
       colCollation = Collation;
     }

     static void setCharSet(CharInfo::CharSet CharSet)
     {
       charset = CharSet;
     }

     // Have to define new[] and delete[] here if we want to use NAHeap. Even if
     // NABasicObject defined the array forms of these operators, we wouldn't
     // subclass it because it would make the objects bigger (NABasicObject has
     // a heap ptr member variable).
     void* operator new[](size_t size)
     {
       return STMTHEAP->allocateMemory(size, FALSE);
     }

     void operator delete[](void *addr)
     {
       STMTHEAP->deallocateMemory(addr);
     }

     // Note that we forego the usual convention of having operator= return a
     // reference to the assigned-to object. This is just to make this operation
     // as efficient as possible, since it will be performed many times.
     void operator=(const ISVarChar &rhs)
     {
       content = rhs.content;
     }

     void operator=(char* ptr)
     {
       content = ptr;
     }

     // Compare this object to rhs, returning negative value if less, 0 if equal,
     // and positive value if greater.
     Int32 compare(const ISVarChar &rhs);

     Int32 operator==(const ISVarChar &rhs);

     Int32 operator!=(const ISVarChar &rhs)
     {
       return !(*this == rhs);
     }

     Int32 operator<(const ISVarChar &rhs)
     {
       return compare(rhs) < 0;
     }

     Int32 operator>(const ISVarChar &rhs)
     {
       return compare(rhs) > 0;
     }

     Int32 operator<=(const ISVarChar &rhs)
     {
       return compare(rhs) <= 0;
     }

     Int32 operator>=(const ISVarChar &rhs)
     {
       return compare(rhs) >= 0;
     }

     // These operators must be defined to allow this type to be used with
     // existing templates.
     operator Int64() { fail("Int64()", __LINE__); return 0; };
     operator Int32() { fail("int()", __LINE__); return 0; };
     Int32 operator/(Int32 i) { fail("/", __LINE__); return 0; };
     Int32 operator*(Int32 i) { fail("*", __LINE__); return 0; };
     Int32 operator%(Int32 i) { fail("%", __LINE__); return 0; };
     Int32 operator>=(Int32 i) { fail(">=", __LINE__); return 0; };
     Int32 operator<(Int32 i) { fail("<", __LINE__); return 0; };
     ISVarChar& operator-() { fail("-", __LINE__); return *this; };

   protected:
     // Give internal error if undefined operator invoked.
     void fail(const char* opName, Lng32 line);

     // To make ISVarChar as lightweight as possible, we use a static members to
     // hold column attributes, rather than repeating them for each instance.
     // They must be set before each char column is processed.
     static THREAD_P Int32 declaredLength;
     static THREAD_P NABoolean caseInsensitive;
     static THREAD_P CharInfo::Collation colCollation;
     static THREAD_P CharInfo::CharSet charset;

     // The content pointed to by objects of this class consists of a 2-byte
     // field giving the length in bytes, immediately followed by a string
     // represented by that number of bytes.
     char* content;
 };


 // IUSVarChar extends ISVarChar in much the same way and for the same purposes
 // that IUSFixedChar does for ISFixedChar. The implementations of next() and
 // the assignment from HSDataBuffer differ due to the presence of a length
 // indicator for varchars.
 class IUSVarChar : public ISVarChar
 {
   public:
     IUSVarChar(NABoolean ownsContent = TRUE)
       : ISVarChar(),
         ownsContent_(ownsContent)
     {}

     virtual ~IUSVarChar()
     {
       // Content may be allocated and owned for this subclass, but not ISVarChar.
       if (ownsContent_)
         NADELETEBASIC(content, STMTHEAP);
     }

     void operator=(const HSDataBuffer& buff);

     void next()
       {
         // strData contains declared (not just actual) number of chars.
         content += (sizeof(Int16) +     // # bytes in length field
                     (declaredLength * (charset == CharInfo::UNICODE ? sizeof(NAWchar) : 1)));

         // Each piece of varchar data (including the data length) is placed
         // at the even-address boundary. See switch statement case for
         // VARCHAR in method HSGlobalsClass::processInternalSortNulls().
         // Here we follow the same logic (for pointer "content").
         if ( ( (ULong(content)) & 1) == 1 )
           content++;
       }

   private:
     NABoolean ownsContent_;
 };


 //---------------------------------- FOR MC CHANGES --------------------------------------------------

 // for MC
 //  The iterator classes are used by the MCWrapper objects to help compare columns of a given type.
 //
 //  The MCIterator class hierarchy is as follows:
 //
 //                                                    MCIterator
 //                                                        |
 //                                                        |
 //                             -------------------------------------------------------
 //                            |                           |                           |
 //                            |                           |                           |
 //                    MCiFixedCharIterator         MCNonCharIterator            MCVarCharIterator
 //
 //
 class MCIterator
 {
 public:
    MCIterator() : nullInd(NULL) {};
    virtual ~MCIterator() {}

    // compares two values in athe array of data maintained by this
    // iterator. "left" and "right" and indices into this array.
    // The method returns a negative value if less, 0 if equal,
    // and positive value if greater.
    virtual Int32 compare (Int32 left, Int32 right) = 0;
    virtual void print (ofstream& fout, Int32 index) = 0;

    NABoolean isNull(Int32 index)
    {
       return (nullInd && nullInd->testBit(index));
    }

    void dumpBits (const char* f_name, Int32 nRows)
    {
        ofstream fileout(f_name, ios::app);

        fileout << "================ Printing bit Set ================\n";

        if (!nullInd)
           fileout << "no nullable";
        else
        {
             for (Int32 i =0; i < nRows; i++)
             {
                if (nullInd->testBit(i))
                  fileout << "NULL\n";
                else
                  fileout << "NOT NULL\n";
             }
        }

        fileout << "================ Printing bit Set ================\n";
    }

    Lng32      ISdatatype;
    // bitmap of null (data) columns
    NABitVector* nullInd;
 };


 // template class for all non-character datatypes iterators
 template <class T>
 class MCNonCharIterator : public MCIterator
 {
   public:
     MCNonCharIterator(T* ptr)
       : vp(ptr)
     {}

     virtual ~MCNonCharIterator()
     {}

     T* getContent(Int32 index)
     {
        T* vp1 = vp + index;
        return (vp1);
     }

     Int32 compare (Int32 left, Int32 right)
     {
        if (this->nullInd)
        {
           NABoolean leftNull  = (this->nullInd->testBit(left));
           NABoolean rightNull = (this->nullInd->testBit(right));

           if (leftNull || rightNull)
           {
              if (leftNull && rightNull)
                return 0;
              else if (leftNull) // null sorts higher than non-null
                return 1;
              return -1;
           }
        }

        T* vp1 = vp + left;
        T* vp2 = vp + right;

        if (*vp1 == *vp2)
          return 0;
        else if (*vp1 > *vp2)
          return 1;

          return -1;
     }

     void print (ofstream& fout, Int32 index)
     {
        T* vp1 = vp + index;

        if (nullInd && nullInd->testBit(index))
          fout << "NULL";
        else
          fout << *vp1;
     }

     T* vp;
 };

 // fixed charater type iterator
 class MCFixedCharIterator : public MCIterator
 {
   public:
     MCFixedCharIterator(char* ptr, Int32 newLength)
       : vp(ptr)
     {
       length=newLength;
     }

     virtual ~MCFixedCharIterator()
     {}

     void copyToISFixChar(ISFixedChar& fixChar, Int32 index)
     {
        fixChar.setLength(length);
        fixChar.setContent(vp + (index*length));
     }

     Int32 compare (Int32 leftIndex, Int32 rhIndex)
     {
       if (nullInd)
       {
          NABoolean leftNull  = (nullInd->testBit(leftIndex));
          NABoolean rightNull = (nullInd->testBit(rhIndex));

          if (leftNull || rightNull)
          {
             if (leftNull && rightNull)
               return 0;
             else if (leftNull) // null sorts higher than non-null
               return 1;
             return -1;
          }
       }

       char* vp1 = vp + (leftIndex*length);
       char* vp2 = vp + (rhIndex*length);

       // Note that case insensitive is not supported with non-binary collation.
       if (CollationInfo::isSystemCollation(colCollation))
          return Collated_cmp(vp1, vp2, length, colCollation, sortBuffer1, sortBuffer2);

       // UCS2 cols not supported in MODE_SPECIAL_1 or 2 and do not support case insensitivity.
       if (!caseInsensitive)
       {
         if (charset != CharInfo::UNICODE)
           return memcmp(vp1, vp2, length);
         else
           return na_wcsnncmp((const wchar_t *)vp1, length/sizeof(NAWchar),
                              (const wchar_t *)vp2, length/sizeof(NAWchar));
       }
       else
         return hs_strncasecmp(vp1, vp2, length);
     }

     void print (ofstream& fout, Int32 index)
     {
       char* vp2 = vp + (index*length);

       if (nullInd && nullInd->testBit(index))
          fout << "NULL";
       else
       {
         char *temp = new char[length+1];
         strncpy (temp, vp2, length);
         temp[length] = '\0';
         fout << temp;
         delete temp;
       }
     }

     // These must be set before each char column is processed.
     NABoolean caseInsensitive;
     CharInfo::Collation colCollation;
     CharInfo::CharSet charset;

   protected:
     char* vp;
     Int32 length;
 };

 // variable charater type iterator
 class MCVarCharIterator : public MCIterator
 {
   public:
     MCVarCharIterator(char* ptr)
       : vp(ptr)
     {
     }

     MCVarCharIterator(MCVarCharIterator& rh)
     {
       vp = rh.vp;
       rowLength = rh.rowLength;
     }

     void copyToISVarChar (ISVarChar& varChar, Int32 index)
     {
        varChar.setContent(vp + (index*rowLength));
     }

     virtual ~MCVarCharIterator()
     {}

     Int32 compare (Int32 leftIndex, Int32 rhIndex)
     {
       if (nullInd)
       {
          NABoolean leftNull  = (nullInd->testBit(leftIndex));
          NABoolean rightNull = (nullInd->testBit(rhIndex));

          if (leftNull || rightNull)
          {
             if (leftNull && rightNull)
               return 0;
             else if (leftNull) // null sorts higher than non-null
               return 1;
             return -1;
          }
       }

       char* vp1 = vp + (leftIndex*rowLength);
       char* vp2 = vp + (rhIndex*rowLength);

       short len1 = *(short *) vp1;
       short len2 = *(short *) vp2;

       // Note that case insensitive is not supported with non-binary collation.
       if (CollationInfo::isSystemCollation(colCollation))
          return Collated_cmp(vp1+VARCHAR_LEN_FIELD_IN_BYTES,
                              vp2+VARCHAR_LEN_FIELD_IN_BYTES,
                              MAXOF(len1, len2),
                              colCollation, sortBuffer1, sortBuffer2);

       // UCS2 cols not supported in MODE_SPECIAL_1 or 2 and do not support case insensitivity.
       if (!caseInsensitive)
       {
         if (charset != CharInfo::UNICODE)
           return memcmp(vp1+VARCHAR_LEN_FIELD_IN_BYTES,
                         vp2+VARCHAR_LEN_FIELD_IN_BYTES,
                         MAXOF(len1, len2));
         else
           return na_wcsnncmp((const wchar_t*)(vp1+VARCHAR_LEN_FIELD_IN_BYTES), len1/sizeof(NAWchar),
                              (const wchar_t*)(vp2+VARCHAR_LEN_FIELD_IN_BYTES), len2/sizeof(NAWchar));
       }
       else
         return hs_strncasecmp(vp1+VARCHAR_LEN_FIELD_IN_BYTES,
                               vp2+VARCHAR_LEN_FIELD_IN_BYTES,
                               MAXOF(len1, len2));
     }

     void print (ofstream& fout, Int32 index)
     {
       char* vp2 = vp + (index*rowLength);

       if (nullInd && nullInd->testBit(index))
          fout << "NULL";
       else
       {
         short strLen = *(short *) vp2;
         char *temp = new char[strLen+1];
         strncpy (temp, vp2+sizeof(short), strLen);
         temp[strLen] = '\0';
         fout << temp;
         delete temp;
       }
     }

     Int32 rowLength;

     // These must be set before each char column is processed.
     NABoolean caseInsensitive;
     CharInfo::Collation colCollation;
     CharInfo::CharSet charset;

   protected:
     char* vp;
 };

 // MCWrapper class is used to encapsulte MC rows
 //
 // Each MCWrapper object represents a row of the MC. The MCWrapper class has static iterators to encapsulte columns.
 // These iterators are used by the internal sort to compare rows.
 //
 // Example: let's assume our data consists of 3 rows with each row has 2 columns. Column 1 of type Int32 and Column 2
 //          of type Int64. Column 2 is a nullable column. Column1 and Column 2 iterators point to where the actual data
 //          is.
 //
 //             MCWrapper objects                     Iterator objects
 //   row1: index=0, cols  ----|                         |- col1: MCNonCharIterator<Int32> --> Int32* vp --> 1,12,3
 //   row2: index=1, cols  ----| ------------------------|- col2: MCNonCharIterator<Int64> --> Int64* vp --> 10,4,NULL
 //   row3: index=2, cols  ----|
 //
 //
 //
 class MCWrapper
 {
   public:
     MCWrapper()
       : index_ (0)
     {}

     void setIndex (Int32 newIndex)
     {
         index_ = newIndex;
     }

     // Have to define new[] and delete[] here if we want to use NAHeap, because
     // NABasicObject does not define array versions of those operators. Even if
     // it did, we wouldn't subclass it because it would make the objects bigger
     // (NABasicObject has a heap ptr member variable).
     MCWrapper(const MCWrapper &other)
     {
        index_ = other.index_;
     }

     // Have to define new[] and delete[] here if we want to use NAHeap, because
     // NABasicObject does not define array versions of those operators. Even if
     // it did, we wouldn't subclass it because it would make the objects bigger
     // (NABasicObject has a heap ptr member variable).
     void* operator new[](size_t size)
     {
       return STMTHEAP->allocateMemory(size, FALSE);
     }

     void operator delete[](void *addr)
     {
       STMTHEAP->deallocateMemory(addr);
     }

     // Note that we forego the usual convention of having operator= return a
     // reference to the assigned-to object. This is just to make this operation
     // as efficient as possible, since it will be performed many times.
     void operator=(const MCWrapper& rh)
     {
        index_ = rh.index_;
     }

     Int32 operator==(const MCWrapper& rh)
     {
        Int32 i = 0;

        //if (index_ == rh.index_)
           //return (TRUE);

        while ((i < numOfCols_) && (cols_[i]->compare(index_, rh.index_) == 0))
        {
           i++;
        }
        return (i == numOfCols_);
     }

     Int32 operator!=(const MCWrapper& rh)
     {
        return !(*this == rh);
     }

     Int32 operator<(const MCWrapper& rh)
     {
        Int32 i = 0;
        Int32 result  = 0;

        while ((i < numOfCols_) && ((result = (cols_[i]->compare(index_, rh.index_))) == 0))
        {
           i++;
        }

        return (result < 0);
     }

     // are all MC columns nullable?
     static NABoolean areAllMCColsNullable()
     {
        Int32 i = 0;
        NABoolean allNullable = TRUE;

        while ((i < numOfCols_) && allNullable)
        {
           if (!cols_[i++]->nullInd)
             allNullable = FALSE;
        }

        return allNullable;
     }

     // are all MC columns' values null?
     static NABoolean areAllMCColsNull(Int32 rowIndex)
     {
        Int32 i = 0;
        NABoolean allNulls = TRUE;

        // first check is defensive since we should not be
        // calling this method if any of the columns is
        // not nullable
        while ((i < numOfCols_) && allNulls)
        {
           if (!cols_[i]->nullInd || !(cols_[i]->nullInd->testBit(rowIndex)))
             allNulls = FALSE;
           i++;
        }

        return allNulls;
     }

     // for debugging - print all the values of a given column
     void print_column (const char* f_name, NABoolean printHeader, NABoolean printFooter, Int32 col)
     {
        if (!f_name)
          return;

        ofstream fileout(f_name, ios::app);

        if (printHeader)
           fileout << "================ Printing new MC Data Set ================\n";

        allCols_[col]->print(fileout, index_);
        fileout << "\n";

        if (printFooter)
           fileout << "================ Done Printing MC Data Set ================\n\n";
     }

     // for debugging - print all the values of all columns
     void print (const char* f_name, NABoolean printHeader, NABoolean printFooter)
     {
        if (!f_name)
          return;

        ofstream fileout(f_name, ios::app);

        if (printHeader)
           fileout << "================ Printing new MC Data Set ================\n";

        Int32 i = 0;
        while (i < numOfCols_)
        {
           cols_[i++]->print(fileout, index_);
           fileout << " ";
        }
        fileout << "\n";

        if (printFooter)
           fileout << "================ Done Printing MC Data Set ================\n\n";
     }

     static Lng32 setupMCColumnIterator (HSColGroupStruct *group, MCIterator** iter, MCIterator** iter2,
                                         Int32 &currentLoc, Int32 &notNullLoc, Int32 numRows);

     static void setupMCIterators(HSColGroupStruct *mgroup, Int32 numRows);

     // free up all memory allocated by the iterators (columns)
     void freeColsMem()
     {
        Int32 numAllCols = numOfAllCols_-1;

        while (numAllCols >= 0)
        {
          NADELETEBASIC(allCols_[numAllCols], STMTHEAP);
          numAllCols--;
        }

        NADELETEBASIC(allCols_, STMTHEAP);
        NADELETEBASIC(cols_, STMTHEAP);
        cols_ = NULL;
        allCols_ = NULL;
     }

     // all MC columns that have data (columns that are all nulls are excluded)
     static THREAD_P MCIterator** cols_;
     // all MC columns
     static THREAD_P MCIterator** allCols_;
     static THREAD_P Int32 numOfCols_;
     static THREAD_P Int32 numOfAllCols_;
     // number of null rows for this MC
     static THREAD_P Int32 nullCount_;
     // index of this MC row
     Int32 index_;

     // These operators must be defined to allow this type to be used with
     // existing templates.
     operator Int64() { fail("Int64()", __LINE__); return 0; };
     operator Int32() { fail("int()", __LINE__); return 0; };
     Int32 operator/(Int32 i) { fail("/", __LINE__); return 0; };
     Int32 operator%(Int32 i) { fail("%", __LINE__); return 0; };
     Int32 operator>=(Int32 i) { fail(">=", __LINE__); return 0; };
     Int32 operator<(Int32 i) { fail("<", __LINE__); return 0; };
     MCWrapper& operator-() { fail("-", __LINE__); return *this; };

   protected:
     // Give internal error if undefined operator invoked.
     void fail(const char* opName, Lng32 line);
 };

 //---------------------------------- END of MC IS classes --------------------------------------------------


 // -----------------------------------------------------------------------
 // Linked to HSColGroupStruct.
 // The HSColGroupStruct now has a "NASet" of HSColumnStruct rather than a linked list
 // to avoid duplicate permutations of the same set of columns.
 // This implies that the operator == needs to be defined to allow the set insertion
 // to work correctly.
 // -----------------------------------------------------------------------
 struct HSColumnStruct : public NABasicObject
   {
     NAString         *colname;        /* column name              */
     NAString         *externalColumnName;  /* column name to use in SQL (e.g. with delimiters) */
     Lng32              colnum;         /* column position in table */
     Lng32              position;       /* position in grouplist    */
     Lng32              datatype;
     Lng32              caseInsensitive;/* 1 if char col is not case sensitive, else 0 */
     Lng32              nullflag;       /* 1 if col value can be null, else 0 */
     CharInfo::CharSet  charset;
     CharInfo::Collation colCollation; /* column's collation enum value */
     Lng32              length;
     Lng32              precision;
     Lng32              scale;

     HSColumnStruct(const HSColumnStruct &src, NAMemory *h=STMTHEAP);

     HSColumnStruct()
       : colname(new(STMTHEAP) NAString(STMTHEAP)),
         externalColumnName(new(STMTHEAP) NAString(STMTHEAP)),
         colnum(-1), position(0), datatype(-1), nullflag(-1),
         charset(CharInfo::UnknownCharSet),
         length(-1), precision(-1), scale(-1),
         colCollation(CharInfo::DefaultCollation),
         caseInsensitive(-1)
     {}

     HSColumnStruct& operator=(const HSColumnStruct& rhs);

     NABoolean operator==(const HSColumnStruct&) const;

     void addTruncatedColumnReference(NAString & qry);

     ~HSColumnStruct();
   };

 typedef NASet<HSColumnStruct>  HSColSet;           /* set of column structs   */

 // This enumerates the various states a single-column group can be in with
 // respect to internal sort.
 // NOTE: Any changes or additions to this enum must be mirrored in the
 //       SortStateName array defined in hs_globals.cpp.
 enum SortState
   {
     UNPROCESSED,    // Hasn't been selected yet
     PENDING,        // Selected for batch currently being processed
     OVERRAN,        // Selected for batch currently being processed but
                     // there isn't enough memory (happens only with
                     // varchar compaction where we underestimated average
                     // varchar size)
     PROCESSED,      // Already processed
     DONT_TRY,       // Memory allocation failed, don't try this one again
     SKIP,           // SKIP for the time being
     NO_STATS        // no stats found during IUS processing
   };


 //  Used by MC in-memory logic. Every MC has a  weight to group MCs
 //  in group sets that can be processed together
 //
 class MCWeight
 {
 public:
    MCWeight () : u(0), v(0), w(0) {};

    Int32 operator== (const MCWeight &rhs)
    {
      return ((u == rhs.u) && (v == rhs.v) && (w == rhs.w));
    }

    Int32 operator< (const MCWeight &rhs)
    {
      return ((u < rhs.u) ||
              (u==rhs.u) && (v > rhs.v) ||
              (u==rhs.u) && (v == rhs.v) && (w < rhs.w));
    }

    Int32 operator<= (const MCWeight &rhs)
    {
      return ((*this < rhs) || (*this == rhs));
    }

    void clear ()
    {
       u = v = w = 0;
    }

    NABoolean isNull()
    {
       return ((u == 0) && (v == 0) && (w == 0));
    }

    // number of other MCs this MC has common columns with
    Int32 u;
    // number of distinct columns that are used by this MC
    // but not by other MCs this MC has common columns with
    Int32 v;
    // number of columns that are only used by this MC
    Int32 w;
 };

 // -----------------------------------------------------------------------
 // Linked to HSGlobalsClass.
 // -----------------------------------------------------------------------
 struct HSColGroupStruct : public NABasicObject
   {
     HSColSet          colSet;                      /* set of column structs   */
     Lng32              colCount;                    /* #columns in group       */
     NAString         *clistr;                      /* general query statement */
     ULng32     oldHistid;                   /* old histogram_id        */
     ULng32     newHistid;                   /* new histogram_id        */
     NAString          oldHistidList;               /* a list of old hist ids, in case of duplicates */
     NAString         *colNames;                    /* list of columns in group*/
     HSHistogram      *groupHist;                   /* histogram for group     */
     HSColGroupStruct *next;
     HSColGroupStruct *prev;                        /* reverse list for SHOWSTATS */
     HSColGroupStruct *mcis_next;                   /* For MC IS to point to next neighbor*/
     char              readTime[TIMESTAMP_CHAR_LEN+1]; /* read time; carry over to new hist */
     double            coeffOfVar;                  /* coefficient of variation (skew of this hist) */
     double            oldAvgVarCharSize;           /* average varchar size from previous histograms */
     Int64             rowsRead;                    /* number of rows read for IS so far */
     Int64             sumSize;                     /* sum of varchar size for IS so far */
     double            avgVarCharSize;              /* average varchar size, -1 for other types */
     char              reason;                      /* automation reason        */
     char              newReason;                   /* automation reason for updated hist */
     NABoolean         skewedValuesCollected;       /* Applies to only MC Groups */


     // These member items are used for internal sort of single-column groups.
     SortState        state;                        /* Internal sort status    */
     NABoolean        delayedRead;
     size_t           memNeeded;                    /* memory required, in bytes */
     size_t           strMemAllocated;              /* memory allocated, in bytes, for char data;
                                                       if compacted, this is just the area used
                                                       for compacted data                         */
     void             *data;                        /* Storage for column values */
     void             *nextData;                    /* Ptr to next place to store a value */
     void             *strData;                     /* Storage for char cols; data/nextdata */
     void             *strNextData;                 /*   will be ptrs to this */
     NABoolean        strDataConsecutive;           /* True if strData is as originally read */
     void             *varcharFetchBuffer;          /* Direct fetch addr for varchar values that will be compacted */
     short            *nullIndics;                  /* Storage for null indicators */
     Int64            nullCount;                    /* Number of null values   */
     NABoolean        eligibleForVarCharCompaction; /* true if OK to use compaction on internal sort */
     Lng32            ISdatatype;                   /* converted type for sorting */
     Lng32            ISlength;                     /* len of converted type */
     Lng32            ISvcLenUsed;                  /* varchar only; if compacted, is avg length which is usually < ISlength */
     Lng32            ISprecision;                  /* prec of converted type */
     Lng32            ISscale;                      /* scale of converted type */
     NAString         ISSelectExpn;                 /* select list expn to retrieve col */
     Int64            prevRowCount;                 /* rowcount from existing histogram */
     Int64            prevUEC;                      /* uec from existing histogram */
     Int64            colSecs;                      /* Time to sort/group data for column */
     CountingBloomFilter* cbf;                      /* A bloom filter for IUS */
     NAString& cbfFileNameSuffix() { return *colSet[0].colname; }

     void* boundaryValues;                          /* List of bounary values for IUS */
     void* MFVValues;                               /* List of MFV values for IUS */

     AbstractFastStatsHist* fastStatsHist;

     // These member items are used for internal sort of multi-column groups.
     NABitVector*       mcis_nullIndBitMap;           /* used by MC */
     NABitVector*       mcis_colsUsedMap;             /* used by MC: which single cols used by this MC */
     NABitVector*       mcis_colsMissingMap;          /* used by MC: which single cols not used by this MC but */
     size_t           mcis_totalMCmemNeeded;        /* memory required, in bytes for MC structures overhead*/
     void             *mcis_data;                   /* copy of Storage for column values used by MC*/
     void             *mcis_nextData;               /* copy Ptr to next place to store a value used by MC*/
     Int32            mcis_rowsRead;                /* used for MC: total number of rows read for IS */
     Int32            mcs_usingme;                  /* used for MC: number of MCs using this single column */
     MCWeight         mcis_groupWeight;             /* used by MC: weight of the MC */
     NABoolean        mcis_groupHead;               /* used by MC: is this a group head */
     NABoolean        mcis_memFreed;                /* used by MC: is memory used by IS for this SC freed */
     NABoolean        mcis_readAsIs;                /* used for MC IS where a column is read to memory again */
                                                    /* are used by its neighbors. Used to compute group weight */

     NABoolean allKeysInsertedIntoCBF;
     Int32            backwardWarningCount;          // for UERR_UNEXPECTED_BACKWARDS_DATA warnings

     #ifdef _TEST_ALLOC_FAILURE
     // Stuff used to test memory allocation failures.
     #define MAX_FILTER_COUNT 10
     static Int32 allocCount;
     Lng32 filterTargets[MAX_FILTER_COUNT];
     void initFilter();
     NABoolean allocFilter(Lng32 count);
     #endif

     // @ZX Should we allow this to be called for non-varchar?
     NABoolean isCompacted()
     {
       if (!DFS2REC::isAnyVarChar(ISdatatype))
         return FALSE;
       // TODO: next line causes a compilation error... why?
       //HS_ASSERT(ISvcLenUsed > 0 && ISvcLenUsed <= ISlength);
       return ISlength != ISvcLenUsed;
     }

     void setISlength(Lng32 len, Lng32 maxVarCharLengthInBytes);

     // Size in bytes allocated for per varchar value in strData.
     size_t varcharContentSize()
     {
       return varcharContentSize(ISvcLenUsed);
     }

     // For a compacted varchar, size in bytes of a single value in fetch buffer
     // (prior to compaction).
     size_t inflatedVarcharContentSize()
     {
       return varcharContentSize(ISlength);
     }

     // Calculate size to allocate for strData.
     size_t strDataMemNeeded(Int64 rows);

     // Calculate tha average actual varchar size for the stats
     // collected on the current run.
     NABoolean computeAvgVarCharSize() const
     {
       if ( (colCount == 1)  AND
            (DFS2REC::isAnyVarChar(colSet[0].datatype)) )
         return TRUE;
       else
         return FALSE;
     }

     void print();                                  /* DEBUG: print all groups */

     HSColGroupStruct();
     ~HSColGroupStruct();
     NABoolean allocateISMemory(Int64 rows, NABoolean allocStrData = TRUE,
                                NABoolean recalcMemNeeded = FALSE);
     void freeISMemory(NABoolean freeStrData = TRUE, NABoolean freeMCData=TRUE);
     NAString generateTextForColumnCast();

     // Returned value is the number of bytes needed to represent a single varchar
     // value of the given length. The len parameter could be the declared length
     // of a varchar column, or if varchars are being compacted, the estimated
     // average actual length, or the actual length of a specific compacted varchar.
     // To this we add the size of the length field, and a byte if necessary for the
     // proper alignment of the Int16 length field.
     static inline size_t varcharContentSize(Lng32 len)
     {
       return len                          // declared or avg estimated varchar len
            + (len % 2)                    // possible alignment byte
            + VARCHAR_LEN_FIELD_IN_BYTES;  // size of len field
     }
   };


 // This is the primary template for value iterators that assume the values of
 // an in-memory column in sequence. This is used for all columns that are
 // represented in memory by non-character types. Specializations are defined
 // further down for fixed and varying character strings.
 template <class T>
 class IUSValueIterator
 {
   public:
     IUSValueIterator(T* ptr)
       : vp(ptr)
     {}

     virtual ~IUSValueIterator()
     {}

     void init(HSColGroupStruct* group);

     void next()
     {
       vp++;
     }

     T* dataRepPtr() const
     {
       return vp;
     }

     T& val() const
     {
       return *vp;
     }

     size_t size() const
     {
       return sizeof(T);
     }

   private:
     T* vp;
 };


 // Specialization of iterator template for char types.
 template <>
 class IUSValueIterator <IUSFixedChar>
 {
   public:
     IUSValueIterator(IUSFixedChar* ptr)
       : vp(ptr)
     {}

     virtual ~IUSValueIterator()
     {}

     void init(HSColGroupStruct* group)
     {
       vp->setContent((char*)group->strData);
     }

     void next()
     {
       vp->next();
     }

     char* dataRepPtr() const
     {
       return vp->getContent();
     }

     IUSFixedChar& val() const
     {
       return *vp;
     }

     size_t size() const
     {
       return vp->getLength();
     }

   private:
     IUSFixedChar* vp;
 };


 // Specialization of iterator template for varchar types.
 template <>
 class IUSValueIterator <IUSVarChar>
 {
   public:
     IUSValueIterator(IUSVarChar* ptr)
       : vp(ptr)
     {}

     virtual ~IUSValueIterator()
     {}

     void init(HSColGroupStruct* group)
     {
       vp->setContent((char*)group->strData);
     }

     void next()
     {
       vp->next();
     }

     char* dataRepPtr() const
     {
       // Point past length field to actual string part.
       return vp->getContent() + VARCHAR_LEN_FIELD_IN_BYTES;
     }

     IUSVarChar& val() const
     {
       return *vp;
     }

     size_t size() const
     {
       // Actual length of this varchar value.
       return vp->getLength();
     }

   private:
     IUSVarChar* vp;
 };


 // Thrown in allocateISMemory to exit from the series of allocations if one fails.
 class ISMemAllocException
 {
   public:
     ISMemAllocException() {}
 };

 // -----------------------------------------------------------------------------
 // CLASS: HSGlobalsClass
 // -----------------------------------------------------------------------------
 class HSGlobalsClass : public NABasicObject
   {
     // Following function requires access to groupListFromTable().
     friend Lng32 AddExistingColumns();
 public:
     // parser errors
     enum { ERROR_NONE = 0, ERROR_SYNTAX, ERROR_SEMANTICS};

     // Set CQDs controlling min/max HBase cache size to minimize risk of
     // scanner timeout.
     NABoolean setHBaseCacheSize(double sampleRatio);

     // Set CQD HIVE_MAX_STRING_LENGTH_IN_BYTES if necessary
     NABoolean setHiveMaxStringLengthInBytes(void);

     // Reset any CQDs set above
     void resetCQDs(void);

     // Static fns for determining minimum table sizes for sampling, and for
     // using lowest sampling rate, under default sampling protocol.
     static Int64 getMinRowCountForSample();
     static Int64 getMinRowCountForLowSample();

     // Used by IUS for in-memory tables, and by internal Sort.
     static void getMemoryRequirements(HSColGroupStruct* group, Int64 rows);
     static void getMemoryRequirementsForOneGroup(HSColGroupStruct* group, Int64 rows);

     // used by internal sort for MC to compute MC memory requirements
     void getMCMemoryRequirements(HSColGroupStruct* group, Int64 rows);
     void getMemoryRequirementsForOneMCGroup(HSColGroupStruct* group, Int64 rows);

     static Int32 allocateMemoryForColumns(HSColGroupStruct* group, Int64 rows, HSColGroupStruct* mgr = NULL /* used for MC IS */);
     static Int32 allocateMemoryForIUSColumns(HSColGroupStruct* group, Int64 rows,
                                              HSColGroupStruct* delGroup, Int64 delRows,
                                              HSColGroupStruct* insGroup, Int64 insRows);

     // For internal sort or IUS, remove and count nulls for each column from the
     // rowset just read.
     static Lng32 processInternalSortNulls(Lng32 rowsRead, HSColGroupStruct* firstGroup);

     // Default name of Hive catalog, from cqd HIVE_CATALOG.
     static THREAD_P NAString* defaultHiveCatName;

     // See if catName is the name of a Hive catalog.
     static NABoolean isHiveCat(const NAString& catName)
     {
       return (((defaultHiveCatName != NULL) && (catName == (*defaultHiveCatName))) ||
               catName == HIVE_SYSTEM_CATALOG);
     }

     // Default name of Hbase catalog, from cqd SEABASE_CATALOG.
     static THREAD_P NAString* defaultHbaseCatName;

     // See if catName is the name of an HBase catalog.
     static NABoolean isHbaseCat(const NAString& catName)
     {
       return ((catName == TRAFODION_SYSCAT_LIT) || isNativeHbaseCat(catName));
     }

     static NABoolean isNativeHbaseCat(const NAString& catName)
     {
       return (((defaultHbaseCatName != NULL) && (catName == (*defaultHbaseCatName))) ||
               (catName == HBASE_SYSTEM_CATALOG));
     }

     static NABoolean isNativeCat(const NAString& catName)
     {
       return (isNativeHbaseCat(catName) || isHiveCat(catName));
     }

     static NABoolean isTrafodionCatalog(const NAString& catName)
     {
       return (catName == TRAFODION_SYSCAT_LIT);
     }

     static NABoolean isHBaseUMDHistogram(const NAString& tableName)
     { return (tableName == HBASE_HIST_NAME ||
               tableName == HBASE_HISTINT_NAME); }

     static void resetJitLogThresholdHash () { jitLogThresholdHash = NULL; }

     HSGlobalsClass(ComDiagsArea &diags);
     ~HSGlobalsClass();

     // Intialize stats schema on demand
     Lng32 InitializeStatsSchema();

     //Process USTAT options
     Lng32 Initialize();

     //Checks privileges
     NABoolean isAuthorized(NABoolean isShowStats);

     //Based on USTAT options used, it may not be necessary
     // to collect statistics. This method will tell you if
     // they are needed or not.
     inline NABoolean StatsNeeded() const {return statsNeeded_;}

     //Determines histograms for Single-Column groups
     Lng32 CollectStatistics();

     //Determines histograms for Single-Column groups using Hive backing sample
     // and fast-stats algorithm with CBFs.
     Lng32 CollectStatisticsWithFastStats();

     // Select the next set of columns to process with faststats.
     CollIndex selectFastStatsBatch(HSColGroupStruct** colGroups);

     // Process columns marked PENDING with faststats.
     Lng32 processFastStatsBatch(CollIndex numCols, HSColGroupStruct** colGroups);

     //Update histogram tables with newly generated statistics
     Lng32 FlushStatistics(NABoolean &statsWritten);

     //Drive the gathering and printing of generated statistics
     Lng32 GetStatistics(NAString& outStr, Space& space);

     //Reverse the column list to fix the order
     HSColGroupStruct* ReverseList(HSColGroupStruct* list);

     // Make adjustments to the interval count before creating histograms
     Lng32 getAdjustedIntervalCount(HSColGroupStruct *group,
                                   Lng32 intCount,
                                   Int64 rowCount,
                                   Lng32 rowsetSize,
                                   NABoolean &singleIntervalPerUec,
                                   Lng32 &gapIntCount,
                                   Lng32 &highFreqIntCount);

     //Add specified group to the singleGroup or multiGroup list as appropriate.
     void addGroup(HSColGroupStruct *group);

     // Remove a single group.
     void removeGroup(HSColGroupStruct* groupToRemove);

     // Remove the most recently added groups.
     NABoolean removeGroups(Lng32 numGroupsToRemove,
                            HSColGroupStruct* oldSingle,
                            HSColGroupStruct* oldMulti);

     //Locate single-column group that mathes colnum
     HSColGroupStruct* findGroup(const Lng32 colnum);
     HSColGroupStruct* findGroupAndPos(const Lng32 colnum, Int32 &pos);

     //Locate group that matches given group
     HSColGroupStruct* findGroup(const HSColGroupStruct *tableGroup);

     // check if all MCs have been computed and processed
     NABoolean allMCGroupsProcessed(NABoolean forIS=FALSE);

     //Return TRUE if 'entry' is a duplicate entry in 'list'.
     NABoolean findDuplicate(const HSColGroupStruct *entry,
                                   HSColGroupStruct *list);

     //Delete histograms in list from HISTOGRAMS and HISTOGRAM_INTERVALS tables.
     Lng32 removeHists(NAString &hists, char *uid, const char *operation);

     //Log the current contents of this class.
     void log(HSLogMan* LM);

     // Takes action necessary before throwing exception for an assertion failure.
     void preAssertionFailure(const char* condition, const char* fileName, Lng32 lineNum);

     // Derive a return code from the contents of the diagnostics area.
     Lng32 getRetcodeFromDiags();

     NABoolean canDoIUS()
     { return okToPerformIUS() && wherePredicateSpecifiedForIUS(); };

     NABoolean okToPerformIUS();
     NABoolean useIUSForHistograms();
     NABoolean wherePredicateSpecifiedForIUS();
     NAString& getWherePredicateForIUS();
     Lng32 validateIUSWhereClause();
     NABoolean getPersistentSampleTableForIUS(NAString& tableName,
                                              Int64 &requestedRows,
                                              Int64 &sampleRows,
                                              double &sampleRate,
                                              NABoolean forceToFetch = TRUE);
     Lng32 updatePersistentSampleTableForIUS(NAString& sampleTableName, double sampleRate,
                                             NAString& targetTableName);
     void generateIUSDeleteQuery(const NAString& smplTable, NAString& queryText, NABoolean transactional);
     void generateIUSSelectInsertQuery(const NAString& smplTable,
                                       const NAString& sourceTable,
                                       NAString& queryText);
     void getCBFFilePrefix(NAString& sampleTableName, NAString& filePrefix);
     void detectPersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct *group);
     Lng32 UpdateIUSPersistentSampleTable(Int64 oldSampleSize, Int64 requestedSampleSize, Int64& newSampleSize);
     Lng32 readCBFsIntoMemForIUS(NAString& sampleTableName, HSColGroupStruct* group);
     Lng32 writeCBFstoDiskForIUS(NAString& sampleTableName, HSColGroupStruct* group);
     Lng32 deletePersistentCBFsForIUS(NAString& sampleTableName, HSColGroupStruct* group, SortState stateToDelete);

     void logDiagArea(const char* title);

     Lng32 begin_IUS_work();
     Lng32 end_IUS_work();

     // Populate the hash table used to determine when a ustat statement has run
     // too long and needs to have logging enabled.
     static void initJITLogData();

     // Get the JIT logging time threshold currently in effect.
     double getJitLogThreshold() const
       {
         return jitLogThreshold;
       }

     // Look up the source table being operated on and find its max elapsed time
     // before logging should be activated.
     void setJitLogThreshold()
       {
          double* thresholdPtr = jitLogThresholdHash->getFirstValue(user_table);
          jitLogThreshold = (thresholdPtr ? *thresholdPtr : 0);
       }

     // Get the overall start time for the current ustat statement (in seconds
     // since epoch).
     Int64 getStmtStartTime() const
       {
         return stmtStartTime;
       }

     // Set the overall start time for the current ustat statement (in seconds
     // since epoch). At certain points this will be compared to the current
     // time to see how long the statement has been executing.
     void setStmtStartTime(Int64 time)
       {
         stmtStartTime = time;
       }

     // Compare the elapsed time so far for the ustat statement, and activate
     // logging if it exceeds the threshold currently in effect. If no threshold
     // has been established for the source table, stmtStartTime will be 0 and
     // logging will not be activated regardless of how long we've been running.
     void checkTime(const char* checkPointName)
       {
         if (!jitLogOn &&
             stmtStartTime > 0 &&
             hs_getEpochTime() - stmtStartTime > jitLogThreshold)
           {
             startJitLogging(checkPointName, hs_getEpochTime() - stmtStartTime);
           }
       }

     // Dynamically turn on logging in response to a statement that has been running
     // far longer than expected.
     void startJitLogging(const char* checkPointName, Int64 elapsedSeconds);


     static void setPerformISForMC(NABoolean x) { performISForMC_ = x; }
     static NABoolean performISForMC() { return performISForMC_; }

                                               /*==============================*/
                                               /*     OBJECT INFORMATION       */
                                               /*==============================*/
     HSTableDef    *objDef;                         /* object definition       */
     NAString      *catSch;                         /* catalog+schema name     */
     NAString      *user_table;                     /* object name             */
     NABoolean     isHbaseTable;                    /* ustat on HBase table    */
     NABoolean     isHiveTable;                     /* ustat on Hive table     */
     NABoolean     hasOversizedColumns;             /* set to TRUE for tables  */
                                                    /* having gigantic columns */
     ComAnsiNameSpace nameSpace;                    /* object namespace    ++MV*/
     Int64          numPartitions;                  /* # of partns in object   */
     NAString      *hstogram_table;                 /* HISTOGRM table          */
     NAString      *hsintval_table;                 /* HISTINTS table          */
     NAString      *hsperssamp_table;               /* PERSISTENT_SAMPLES table */
     NAString      *hssample_table;                 /* SAMPLING table          */
     NABoolean      externalSampleTable;            /* ownership of sample tab */
     hs_table_type  tableType;                      /* GUARDIAN | ANSI format  */
     ComDiskFileFormat tableFormat;                 /* SQL/MP | SQL/MX table   */

                                               /*==============================*/
                                               /*    HISTOGRAM INFORMATION     */
                                               /*==============================*/
     NAString      *statstime;                      /* time of execution       */
     ULng32  statsTimeInt;                   /* time of execution       */
     Int64          actualRowCount;                 /* actual #rows            */
     Int64          sampleRowCount;                 /* sampled #rows           */
     Int64          rowChangeCount;                 /* rows IUD since last reset */
     HSColGroupStruct *dupGroup;                    /* list of duplicate hists */
     Int64          minRowCtPerPartition_;          /* minimal rows per partition */

                                               /*==============================*/
                                               /*  SYNTAX OPTION INFORMATION   */
                                               /*==============================*/
     Lng32           optFlags;                       /* syntax option flags     */
     Lng32        intCount;                       /* #intervals              */
     Int64          sampleValue1;                   /* sample option: value1   */
     Int64          sampleValue2;                   /* sample option: value2   */
     double         sampleTblPercent;               /* the sample % to use     */
     NABoolean      sampleOptionUsed;               /* SAMPLE specified        */
     NAString      *sampleOption;                   /* SAMPLE option used      */
     NABoolean      sampleTableUsed;                /* sample table created    */
     NABoolean      samplingUsed;                   /* sample (w/wo sample tbl)*/
     NABoolean      unpartitionedSample;            /* sample tbl not partitned*/
     NABoolean      isUpdatestatsStmt;              /* is update stats command */
     Lng32           groupCount;                     /* total #column groups    */
     Lng32           singleGroupCount;               /* #single-column groups   */
     HSColGroupStruct *singleGroup;                 /* single-column group list*/
     HSColGroupStruct *multiGroup;                  /* multi-column group list */

                                               /*==============================*/
                                               /*  ERROR HANDLING INFORMATION  */
                                               /*==============================*/
     Lng32           parserError;                    /* SYNTAX | SEMANTIC       */
     Lng32           errorCount;                     /* total #errors found     */
     NAString       errorFile;                      /* file in error           */
     Lng32           errorLine;                      /* file location of error  */
     ComDiagsArea  &diagsArea;                      /* diagnostic area         */

                                               /*==============================*/
                                               /*  AUTOMATION INFORMATION      */
                                               /*==============================*/
     static THREAD_P COM_VERSION    schemaVersion;           /* metadata version                                */
     static THREAD_P Lng32           autoInterval;            /* automation interval.  If 0, it is disabled.     */
     Int64                 sampleSeconds;           /* time to create sample table. 0 if no sampling   */
     Int64                 columnSeconds;           /* average time to read a column into memory       */
                                                    /* for internal sort                               */
     short                 samplePercentX100;       /* sampling percent to create sample table * 100   */
     NABoolean             allMissingStats;         /* TRUE if all hists to create are missing stats.  */

                                               /*==============================*/
                                               /*  OTHER INFORMATION           */
                                               /*==============================*/
     NABoolean             requestedByCompiler;     /* TRUE if ustats called by compiler. */

     double                sampleRateAsPercetageForIUS; /* sample rate in percentage
                                                           for one instance of persistent
                                                           sample table */

     NABoolean            sample_I_generated;

     Lng32          maxCharColumnLengthInBytes;   /* the value of USTAT_MAX_CHAR_COL_LENGTH_IN_BYTES */

     // Error recovery flags so we can reset CQDs that we set
     // during CollectStatistics() (We do this because the
     // HSHandleError macro commonly used makes it hard to
     // do the resets reliably in CollectStatistics itself. Sigh.)

     NABoolean hbaseCacheSizeCQDsSet_;
     NABoolean hiveMaxStringLengthCQDSet_;

 private:
     //++ MV
     // special parser flags (see contr. and destr.)
     enum { dmALLOW_SPECIALTABLETYPE  = 0x1, dmALLOW_PHONYCHARACTERS = 0x2, dmINTERNAL_QUERY_FROM_EXEUTIL = 0x20000};
     ULng32               savedParserFlags;

     //Generated unique histogram IDs for all groups
     Lng32 MakeAllHistid();

     //Builds group list from HISTOGRAMS table
     Lng32 groupListFromTable(HSColGroupStruct*& groupList,
                              NABoolean skipEmpty=FALSE,
                              NABoolean exclusive=FALSE); // do we need exclusive locks on the accessed rows

     //Computes Multi-Column statistics, based on Single-Column statistics
     Lng32 ComputeMCStatistics(NABoolean usingIS=FALSE /* try using IS to compute MCs */);

     //Calculate final ROWCOUNT and UEC due to sampling
     Lng32 FixSamplingCounts(HSColGroupStruct *group);

     //Clear all histograms based on object_uid
     Lng32 ClearAllHistograms();

     //Clear selected histograms based on object_uid and hist_id
     Lng32 ClearSelectHistograms();

     //Delete all orphan histograms for SQL/MP tables.
     Lng32 DeleteOrphanHistograms();

     //Insert new statistics + Delete old statistics
     Lng32 WriteStatistics();

     //Gather and create output string for generated histograms
     Lng32 DisplayHistograms(NAString& displayData, Space& space,const ULng32 oldHistId, const char* colnames);

     //Internal sort functions.
     //
     // When performing internal sort, determines the amount of memory required
     // for each column that will be read into memory.
     Int64 getInternalSortMemoryRequirements(NABoolean performISForMC);

     // Get maximum amount of memory to use for internal sort.
     Int64 getMaxMemory();

     // re-order multi-column and single-column groups to maximize the number
     // of multi-column group stats that can be done in memory
     NABoolean orderMCGroupsNeeded();
     void orderMCGroups (HSColGroupStruct* s_group_back[]);

     // helper functions for orderMCGroups
     void computeMCGroupsWeight();
     void computeSingleUsedCols();
     void reorderMCGroupsByWeight();
     void formGroupSets();
     void reorderSingleGroupsByWeight (HSColGroupStruct* s_group_back[], Int32 colsOrder[], Int32 &headGroupCols);
     void freeMCISmemory(HSColGroupStruct* s_group_back[], Int32 colsOrder[], Int32 &headGroupCols);
     void reArrangeMCGroups();

     // Select a set of columns that will fit in available memory so they can
     // be sorted internally.
     Int32 selectSortBatch(Int64 rows, NABoolean ISonlyWhenBetter,
                         NABoolean trySampleInMemory);

     // Select a set of columns that can be IUS updated in memory in one batch.
     // 'curentRows' is the number of rows currently in the sample table,
     // 'futureRows' is the number of rows to be populated in sample table
     // after IUS, 'ranOut' set to TRUE when no enough memory to perform
     // any IUS, and 'colsSelected' indicates # of columns selected for
     // IUS in this batch.
     Lng32 selectIUSBatch(Int64 currentRows, Int64 futureRows,NABoolean& ranOut, Int32& colsSelected);

     // Determine if all groups (both single and MC) can fit in memory for internal sort.
     // No space is actually allocated and no state is set for each group.
     NABoolean allGroupsFitInMemory(Int64 rows);

     // Determine the next batch of columns to be processed with internal sort
     // by calling selectSortBatch() and ensuring that adequate memory can be
     // allocated for those columns.
     Int32 getColsToProcess(Int64 rows,
                          NABoolean internalSortWhenBetter,
                          NABoolean trySampleTableBypass = FALSE);

     // If we decide to create and load a sample table, deallocate column memory
     // and reset PENDING group states back to UNPROCESSED before creating and
     // loading the sample table. We'll call getColsToProcess to reallocate it
     // again afterwards.
     void deallocatePendingMemory(void);

     // After an allocation failure, this is called to reduce the amount of
     // memory we estimate is available.
     static void memReduceAllowance();

     // When a memory allocation fails, return any memory already allocated for
     // the group for internal sort, and set any PENDING columns back to
     // UNPROCESSED state.  This function cannot fail.
     static void memRecover(HSColGroupStruct* group, NABoolean firstFailed, Int64 rows,
                            HSColGroupStruct* mgroup);

     // Allocate memory for the columns selected for an internal sort batch.
     //Int32 allocateMemoryForColumns(Int64 rows);
     Int32 allocateMemoryForInternalSortColumns(Int64 rows);

     Lng32 prepareToReadColumnsIntoMem(HSCursor *cursor, Int64 rows);

     // Reads all values for selected columns into memory, where they can be
     // sorted and then grouped into intervals.
     Lng32 readColumnsIntoMem(HSCursor *cursor, Int64 maxRows);

     // Iterates through group list for single columns, and calls sorting
     // routine for each column marked as PENDING.
     Lng32 sortByColInMem();

     // Creates histograms for columns once they are sorted.
     Lng32 createStats(Int64 rowsAllocated);

     // Creates histograms for the columns specified in group.
     Lng32 createStatsForColumn(HSColGroupStruct* group, Int64 rowsAllocated);


     // Collect statistics by incrementally updating persistent sample table and
     // possibly histograms as well.
     Lng32 doIUS(NABoolean& done);

     // Collect stats by incrementally updating histograms where possible. Persistent
     // sample is also incrementally updated.
     Lng32 doFullIUS(Int64 currentSampleSize, Int64 futureSampleSize, NABoolean& done);

     // Causes persistent sample table to be incrementally updated, and other
     // preparatory tasks so RUS can be performed using persistent sample.
     Lng32 prepareToUsePersistentSample (Int64 currentSampleSize, Int64 futureSampleSize);

     // Incrementally update histograms for a selected batch of columns
     Lng32 CollectStatisticsForIUS(Int64 currentSampleSize, Int64 futureSampleSize);

     //
     // Prepare for IUS. This method implements the 1st algorithm which
     // does not requre persistent CBFs. It performs the following:
     // 1. Check the existentce of the persistable table S
     // 2. Update the sample table with S-D and S-D+I
     // 3. Optionally trim the final sample table to the same size as before.
     Lng32 computeSampleSizeForIUS(Int64& currentSampleSize, Int64& futureSampleSize);
     void setMemoryRequirementForIUS(HSColGroupStruct *group, Int64 futureSampleSize);

     Lng32 prepareForIUSAlgorithm1(Int64& rows /* # of rows in the sample table */);

     // Generate the incremental sample (aka sample set I)
     Lng32 generateSampleI(Int64 currentSampleSize, Int64 futureSampleSize);

     Lng32 moreColsForIUS();

     // Use In-memory tables to update histograms incrementally.
     Lng32 incrementHistograms();

     Lng32 initIUSIntervals(HSColGroupStruct* group,
                            HSColGroupStruct* delGroup,
                            HSColGroupStruct* insGroup,
                            UInt32 histID,
                            Int16 numIntervals);

     Int32 processIUSColumn(HSColGroupStruct* smplGroup,
                            HSColGroupStruct* delGroup,
                            HSColGroupStruct* insGroup);


     NABoolean statsNeeded_;    /* statistics are needed   */
     UstatContextID contID_;    /* context ID              */
     static THREAD_P float ISMemPercentage_;    /* % of available physical memory to use for internal sort */
     NABoolean currentRowCountIsEstimate_;          /* Row count est flag      */

     //HSInMemoryTable* iusSampleInMem;
     HSInMemoryTable* iusSampleDeletedInMem;
     HSInMemoryTable* iusSampleInsertedInMem;

     // used by IUS code for clean up purposes
     NABoolean sampleIExists_;

     // For IUS, once the persistent sample table has been successfully updated
     // in accordance with the IUS predicate, these ptrs will point to the requested
     // (expected) and actual number of rows in the sample table. end_IUS_work will
     // pass these ptrs to the function that updates the sample table's row in
     // SB_PERSISTENT_SAMPLES. If non-null, the values are used for the corresponding
     // columns in that table.
     Int64* PST_IUSrequestedSampleRows_;
     Int64* PST_IUSactualSampleRows_;

     template <class T>
     Int32 processIUSColumn(T* ptr,
                            const NAWchar* format,
                            HSColGroupStruct* smplGroup,
                            HSColGroupStruct* delGroup,
                            HSColGroupStruct* insGroup);

     // This function is used by convertBoundaryOrMFVValue() for types that can't
     // be handled by a simple call to na_swscanf().
     template <class T>
     T convertToISdatatype(T*,
                           const HSDataBuffer& valToConvert,
                           HSColGroupStruct* group);

     // Template for converting the value in an HSDataBuffer (used for interval
     // boundary and MFV values) to any non-char type. The converted value goes
     // in element 'index' of the array 'convertedValues'.
     template <class T>
     void convertBoundaryOrMFVValue(const HSDataBuffer& valToConvert,
                                    HSColGroupStruct* group,
                                    Int32 index,
                                    T* convertedValues,
                                    const NAWchar* format)
     {
       // Can just use na_swscanf() unless the column's in-memory type was mapped
       // from its original type, or is a fixed numeric with nonzero scale.
       Int32 actualDatatype = group->colSet[0].datatype;
       if (group->ISdatatype != actualDatatype ||
           (actualDatatype >= REC_MIN_BINARY && actualDatatype <= REC_MAX_BINARY
                                             && group->colSet[0].scale > 0))
         convertedValues[index] = convertToISdatatype((T*)NULL, valToConvert, group);
       else
         na_swscanf((const NAWchar*)valToConvert.data(), format, convertedValues+index);
     }

     // Template specialization for converting value in an HSDataBuffer to an
     // instance of IUSFixedChar.
     void convertBoundaryOrMFVValue(const HSDataBuffer& valToConvert,
                                    HSColGroupStruct* group,
                                    Int32 index,
                                    IUSFixedChar* convertedValues,
                                    const NAWchar* format)
     {
       convertedValues[index] = valToConvert;
     }

     // Template specialization for converting value in an HSDataBuffer to an
     // instance of IUSVarChar.
     void convertBoundaryOrMFVValue(const HSDataBuffer& valToConvert,
                                    HSColGroupStruct* group,
                                    Int32 index,
                                    IUSVarChar* convertedValues,
                                    const NAWchar* format)
     {
       convertedValues[index] = valToConvert;
     }

     double computeAvgCharLengthForIUS(HSColGroupStruct* group,
                                       HSColGroupStruct* delGroup,
                                       HSColGroupStruct* insGroup);

     Int32 estimateAndTestIUSStats(HSColGroupStruct* smplGroup,
                                   HSColGroupStruct* delGroup,
                                   HSColGroupStruct* insGroup,
                                   HSHistogram* hist,
                                   CountingBloomFilter* cbf,
                                   Lng32 numNonNullIntervals,
                                   double scaleFactor,
                                   Int32 nullCount,
                                   Int64* intvlRC);

     Lng32 mergeDatasetsForIUS();

     Lng32 mergeDatasetsForIUS(
                        HSColGroupStruct* smplGroup, Int64 smplrows,
                        HSColGroupStruct* delGroup, Int64 delrows,
                        HSColGroupStruct* insGroup, Int64 insrows);

     template <class T_IUS, class T_IS>
     Int32 mergeDatasetsForIUS(T_IUS* ptr, T_IS* dummyPtr,
                        HSColGroupStruct* smplGroup, Int64 smplrows,
                        HSColGroupStruct* delGroup, Int64 delrows,
                        HSColGroupStruct* insGroup, Int64 insrows);

     template <class T>
     class HSHiLowValues
       {
         public:

           NABoolean seenAtLeastOneValue_;  // initially FALSE
           // the next two are valid only if seenAtLeastOneValue_ is TRUE
           T hiValue_;  // highest value seen so far
           T lowValue_; // lowest value seen so far

           HSHiLowValues() : seenAtLeastOneValue_(FALSE) { };

           void findHiLowValues(T& val)
             {
               if (seenAtLeastOneValue_)
                 {
                   if (val < lowValue_)
                     lowValue_ = val;
                   else if (val > hiValue_)
                     hiValue_ = val;
                 }
               else
                 {
                   seenAtLeastOneValue_ = TRUE;
                   lowValue_ = val;
                   hiValue_ = val;
                 }
             };
       };

     template <class T>
     Int16 findInterval(Int16 numInt, T* boundaries, T& val)
       {
         Int16 low = 1;
         Int16 high = numInt;
         Int16 current;
         //@ZX need to check special case of single interval
         while (high > low+1)
           {
             current = low + ((high - low) / 2);
             if (val <= boundaries[current])
               high = current;
             else
               low = current;
           }

         if (val <= boundaries[low])
           return low;
         else
           return high;
       }

     Int32 logCBF(const char*, CountingBloomFilter* cbf);

     // Hash table mapping table names to the elapsed time thresholds for
     // activating just-in-time logging. This is used to capture log info for
     // Ustat statements running long past their expected execution time.
     // The hash table is a static member so we can set it up once and reuse
     // it for any subsequent ustat stmt.
     static THREAD_P JitLogHashType* jitLogThresholdHash;
     double jitLogThreshold;
     Int64 stmtStartTime;
     NABoolean jitLogOn;

     // For IUS, was the SB_PERSISTENT_SAMPLES row for the source table updated?
     // The change is undone by the HSGlobalsClass dtor, so we need to account for
     // the possibility that an IUS statement failed prior to making the change.
     // Otherwise, a concurrent IUS operation could have its changes to the row
     // overwritten.
     NABoolean PSRowUpdated;

     static THREAD_P NABoolean performISForMC_;

   };  // class HSGlobalsClass

 // -----------------------------------------------------------------------
 // Column descriptor to store column info returned from CLI.
 // -----------------------------------------------------------------------
 struct HSColDesc : public NABasicObject
 {
   Lng32 datatype;
   Lng32 length;
   Lng32 precision;
   Lng32 scale;
   Lng32 nullflag;

   Lng32 dataOffset;
   Lng32 indDataOffset;
   char *data;
   char *indData;

   Lng32 groupNum;
   NABoolean isSingleColGroup;

   HSColDesc()
     : data(NULL), indData(NULL),
       isSingleColGroup(FALSE)
   {}

   inline NABoolean isNull(const char *dataBuf) const
   {
     return (nullflag &&
            (dataBuf[indDataOffset] == (char)0xFF));
   }

   inline NABoolean isNull() const
   {
     return (nullflag &&
            (*indData == (char)0xFF));
   }

   // only if datatype == REC_BYTE_V_ASCII.
   inline Int32 varcharLen(const char *dataBuf) const
   {
     short len;
     memcpy((char *)&len, &dataBuf[dataOffset], VARCHAR_LEN_FIELD_IN_BYTES);
     return (Int32)len;
   }

   inline Int32 varcharLen() const
   {
     short len;
     memcpy((char *)&len, data, VARCHAR_LEN_FIELD_IN_BYTES);
     return (Int32)len;
   }

   inline void rebase(const Lng32 base)
   {
     dataOffset -= base;
     indDataOffset -= base;
   }
 };

 // Constants used by FrequencyCounts: the size of the
 // hash table, a prime number, and the number of f_i
 // values stored explicitly in a dense array. Can't use
 // static const ints for these, because they are used as
 // bounds in array declarations.
 #define FC_NUM_HT_BUCKETS 389
 #define FC_NUM_STORED_VALUES 1024

 //
 // Class to maintain frequency counts (f_i) of a set of
 // values, used for estimating UECs from a sample.  f_1
 // is the number of values that occur exactly one time in
 // a sample, f_2 the number of values that occur exactly 2
 // times, and so on.
 // Note: Normally, this class would be a 'public NABasicObject'.
 //       However, we need an array of these objects on the heap
 //       and the following do not work when it is an NABasicObject:
 //         - FrequencyCounts *arr = new FrequencyCounts[x];
 //           delete [] arr;
 //         - FrequencyCounts *arr = new (STMTHEAP) FrequencyCounts[x];
 //           NADELETEARRAY(arr, x, FrequencyCounts, STMTHEAP);
 //
 //       With the form it is, we can use the standard C++ method
 //       of alloc/dealloc (the MX STMTHEAP method does not work).
 //
 class FrequencyCounts
 {
  public:
   FrequencyCounts();
   ~FrequencyCounts();

   // Copy assignment is used when an interval is copied while removing
   // undersized gap intervals.
   FrequencyCounts& operator=(const FrequencyCounts& rhs);

   // reset all the frequency counts to 0
   void reset();

   // increment f_i by value specified (default 1).
   void increment(Int64 i, ULng32 val=1);

   // return f_i
   ULng32 operator[](Int64 i);

   // merge frequency counts into specified object (i.e., f)
   void mergeTo(FrequencyCounts &f);

  private:
   // Copy constructor is left undefined.
   FrequencyCounts(const FrequencyCounts& other);

   // for i in the range 1..(FC_NUM_STORED_VALUES-1), f_i values are
   // stored in array fiArr_.  the value of f_i is fiArr_[i].
   // for i >= FC_NUM_STORED_VALUES, nonzero i and f_i values are
   // stored in hash table bigfiHT_.
   // hash table entry
   struct entry
   {
     ULng32 ix_;
     ULng32 value_;
     struct entry *next_;
   };

   // helper methods
   //
   void resetHT();
   void incrementHT(ULng32 ix, ULng32 val);
   ULng32 lookupHT(ULng32 ix);
   struct entry *newEntry(ULng32 ix, ULng32 value);
   struct entry *hashToBucket(ULng32 ix);

   // array of fi values and hash table
   ULng32 fiArr_[FC_NUM_STORED_VALUES];
   struct entry bigfiHT_[FC_NUM_HT_BUCKETS];
 };

 class HSInterval
   {
 public:
     HSInterval();
     ~HSInterval();

     Int64 rowCount_;
     Int64 uecCount_;
     HSDataBuffer boundary_;
     Int64 MFVrowCount_; // stores Most Frequent Value frequency (rowcount)
     Int64 MFV2rowCount_; // second Most Frequent Value frequency (rowcount)
     HSDataBuffer mostFreqVal_; // stores Most Frequent Value
     double gapMagnitude_;  // leave as 0 for non-gap intervals
     NABoolean highFreq_;   // if TRUE, an interval for a high-frequency value
     double squareCntSum_;  // the summation of the square of all value counts
                            // squaredCntSum_ is used to calculate skew for
                            // sampling UEC estimation and std dev of freq.
     Int64 origUec_; // to save original interval UEC, needed to compute stdev
     Int64 origRC_;  // to save original interval RC, needed to scale MFV properly
     Int64 origMFV_;  // to save original interval MFV, needed to scale MFV properly
   };

 // The GapKeeper class tracks the n largest gaps as they are discovered.
 // It maintains a sorted array of gap magnitudes, and provides a function
 // to insert a new gap if it is among the largest.
 class GapKeeper
 {
   public:
     GapKeeper(Int32 gapsToKeep);
     ~GapKeeper();
     NABoolean insert(double gap);
     double smallest();
     Int32 qualifyingGaps(double minAcceptableGap);

   private:
     // Copy ctor and assignment not used.
     GapKeeper(const GapKeeper&);
     GapKeeper& operator=(const GapKeeper&);

     Int32 gapsToKeep_;
     double *gaps_;
 };


 class HSHistogram : public NABasicObject
   {
 public:
     HSHistogram(Lng32 intcount, Int64 rowcount, Lng32 gapIntervals, Lng32 highFreqIntervals,
                 NABoolean sampleUsed = FALSE,
                 NABoolean singleIntervalPerUec = FALSE);
     ~HSHistogram();

     void deleteFiArray();
     Lng32 processIntervalValues(boundarySet<myVarChar>* boundaryRowSet,
                                HSColGroupStruct* group,
                                Int64 &rowsInSet,
                                double currentGapAvg);

     Lng32 updateMCInterval(const HSDataBuffer &lowval,
                           const HSDataBuffer &hival);

     void addNullInterval(const Int64 nullCount, const Lng32 colCount);
     // The value returned by getNumIntervals does not include the 0th interval,
     // which is used only to store the minimum value.
     inline Lng32 getNumIntervals() const {return currentInt_;}
     inline NABoolean hasNullInterval() const {return hasNull_;}
     void getOrigTotalCounts(Int64 &rowCount, Int64 &uecCount);
     void getTotalCounts(Int64 &rowCount, Int64 &uecCount);
     Int64 getTotalUec();
     Int64 getTotalRowCount();
     Lng32 getLowValue(HSDataBuffer &lval, NABoolean addParen=TRUE);
     Lng32 getHighValue(HSDataBuffer &hval, NABoolean addParen=TRUE);
     Int64 getHighFreqThreshold()
       { return highFreqThreshold_; }

     inline Int64 getIntRowCount(const Lng32 intNum) const {return intArry_[intNum].rowCount_;}
     inline Int64 getIntUec(const Lng32 intNum) const {return intArry_[intNum].uecCount_;}
     inline double getIntSquareSum(const Lng32 intNum) const {return intArry_[intNum].squareCntSum_;}
     inline Int64 getIntOrigUec(const Lng32 intNum) const {return intArry_[intNum].origUec_;}
     inline Int64 getIntOrigRC(const Lng32 intNum) const {return intArry_[intNum].origRC_;}
     inline Int64 getIntMFVRowCount(const Lng32 intNum) const {return intArry_[intNum].MFVrowCount_;}
     inline Int64 getIntMFV2RowCount(const Lng32 intNum) const {return intArry_[intNum].MFV2rowCount_;}
     inline Int64 getIntOrigMFV(const Lng32 intNum) const {return intArry_[intNum].origMFV_;}

     void setIntRowCount(const Lng32 intNum, const Int64 value) { intArry_[intNum].rowCount_ = value; }
     void addIntRowCount(const Lng32 intNum, const Int64 value) { intArry_[intNum].rowCount_ += value; }
     void setIntOrigUec(const Lng32 intNum, const Int64 value) { intArry_[intNum].origUec_ = value; }
     void setIntOrigRC(const Lng32 intNum, const Int64 value) { intArry_[intNum].origRC_ = value; }
     void setIntMFVRowCount(const Lng32 intNum, const Int64 value) { intArry_[intNum].MFVrowCount_ = value; }
     void setIntMFV2RowCount(const Lng32 intNum, const Int64 value) { intArry_[intNum].MFV2rowCount_ = value; }

     void setIntOrigMFV(const Lng32 intNum, const Int64 value) { intArry_[intNum].origMFV_= value; }

     void setIntUec(const Lng32 intNum, const Int64 value) { intArry_[intNum].uecCount_ = value; }
     Lng32 getParenthesizedIntBoundary(Lng32 intNum, HSDataBuffer &intBoundary);
     const HSDataBuffer& getIntBoundary(Lng32 intNum) { return intArry_[intNum].boundary_; }
     const HSDataBuffer& getIntMFV(Lng32 intNum) { return intArry_[intNum].mostFreqVal_; }
     Lng32 getParenthesizedIntMFV(Lng32 intNum, HSDataBuffer &mostFreqVal);
     FrequencyCounts *fi(const ULng32 intNum)
       { return fi_ ? &(fi_[intNum]) : 0; }
     void removeLesserGapIntervals(double trueGapAvg);
     double getGapMultiplier()
       { return gapMultiplier_; }
     GapKeeper gapKeeper_;

     // Used by IUS when reading existing histograms from metadata. currentInt_ is
     // the number of intervals actually used (intCount_ is the number available).
     void setCurrentInt(const Lng32 numInts) { currentInt_ = numInts; }
     void setHasNull(NABoolean val) { hasNull_ = val; }
     void setIntBoundary(const Lng32 intNum, const char* value, Int16 len)
       { intArry_[intNum].boundary_.copyFrom(value, len, TRUE); }
     void setIntBoundary(const Lng32 intNum, const HSDataBuffer & newBoundary)
       { intArry_[intNum].boundary_ = newBoundary; }
     void setIntMFVValue(const Lng32 intNum, const char* value, Int16 len)
       { intArry_[intNum].mostFreqVal_.copyFrom(value, len, TRUE); }

     void adjustMFVand2MFV(const Lng32 i, double newEstRow, double newEstUec);
     void setIntSquareSum(const Lng32 intNum, double sum) {intArry_[intNum].squareCntSum_ = sum;}

     void maintainEndIntervalForIUS(float avgRCPerInterval, Lng32 intNum);

     void setMaxStddev(double x) { maxStddev_ = x; };
     double getMaxStddev() { return maxStddev_ ; };

     void logIntervals(Lng32 curr = -1, Lng32 lookahead = -1);
     void logAll(const char* title);


 private:
     // Copy ctor and assignment not used.
     HSHistogram(const HSHistogram&);
     HSHistogram& operator=(const HSHistogram&);

     Lng32 mergeInterval(const Lng32 intervalToMerge,
                        const Lng32 prevInterval,
                        const double gapThreshold);
     void mergeMFVs(const Lng32 to, const Lng32 from);

     Lng32            intCount_;       // # of intervals that can be used
     Lng32            maxAllowedInts_; // the total allocated intervals (allows for extras
                                      // during gap/freq encoding).
     Lng32            currentInt_;     // current interval
     Int64           remRows_;        // remainder rows to spread accross intervals
     Int64           step_;           // MAX data points per interval
     Int64           originalStep_;   // unlike step_, not adjusted after each interval
     HSInterval     *intArry_;        // interval array
     NABoolean       hasNull_;        // NULL bounddary is used
     FrequencyCounts *fi_;            // frequency counts (per interval)
     double          gapMultiplier_;  // Gap avg. times this is "big gap" threshold
     Lng32            gapIntCount_;          // # gap intervals created; not all will be kept
     Lng32            targetGapIntervals_;   // keep this many gap intervals
     Lng32            highFreqIntervalsAllotted_; // # added for high freq values; don't include
                                                 //    when calculating step size
     Lng32            highFreqIntervalsUsed_;// # of allotted high frequency intvls actually used
     Int64           highFreqThreshold_;    // row count for a single value beyond which
                                            //   a separate interval is formed
     NABoolean       singleIntervalPerUec_; // flag indicates if this histogram
                                            // will be a 'single interval per
                                            // uec' histogram

     double          maxStddev_;
 public:
   // Have to define this function within class definition since it uses a
   // template (Microsoft compiler gives error C2660 when it is invoked if
   // defined in a separate file).
   /***********************************************/
   /* METHOD:  addIntervalData()                  */
   /* PURPOSE: Add the passed value and its row   */
   /*          count to the current interval, or  */
   /*          to a new one if the row count is   */
   /*          too big to fit in the current one. */
   /* PARAMS:  value      - the unique value.     */
   /*          group      - used to construct     */
   /*                       external format string*/
   /*          numRows    - the number of entries */
   /*                       equal to 'value'      */
   /*          bigGap     - if true, indicates a  */
   /*                       gap of sufficient     */
   /*                       size to create an     */
   /*                       interval for it.      */
   /*          gapMagnitude - Size of the gap that*/
   /*                       precedes this value.  */
   /*          final      - indicates that this is*/
   /*                       (or may be, if using  */
   /*                       of query sort/group,  */
   /*                       which reads a rowset  */
   /*                       at a time) the last   */
   /*                       unique value to be    */
   /*                       added.                */
   /* RETCODE:  0 - successful                    */
   /*          -1 - failure                       */
   /* ASSUMPTIONS: The data is SORTED(increasing) */
   /* NOTES:   bndry:   )[----](----]...(----]    */
   /*          int#     0   1     2  ...   n      */
   /***********************************************/
   template <class T>
   Lng32 addIntervalData(T& value,
                        const HSColGroupStruct *group,
                        const Int64 numRows,
                        NABoolean bigGap,
                        double gapMagnitude,
                        NABoolean final)
   {
     HSLogMan *LM = HSLogMan::Instance();
     Lng32 retcode = 0;
     HSDataBuffer result;
     static T lastValue, mostFreqVal;
     static Int64 MFVrows = 0, MFV2rows = 0;

     // Interval(0) is a special interval and we only need to format its
     // boundary, which serves as the minimum value of interval(1) (and hence
     // the whole histogram). Use the initial value to start off interval(1)
     // and return.
     //
     if (currentInt_ == 0)
       {
         setBufferValue(value, group, intArry_[0].boundary_);
         currentInt_++;
         intArry_[currentInt_].uecCount_ = 1;
         // Interval 1 can't be gap, but it may be a high frequency interval.
         if (numRows < step_ && numRows > highFreqThreshold_)
           {
             intArry_[currentInt_].highFreq_ = TRUE;
             highFreqIntervalsUsed_++;
             if (LM->LogNeeded())
               {
                 sprintf(LM->msg,
                         "Interval 1 used as high frequency interval with " PF64 " rows",
                         numRows);
                 LM->Log(LM->msg);
               }
           }
       }

     // Start a new interval if the current value's rowcount would overflow the
     // current interval, or if this value or the last was the single value included
     // in a gap interval. Otherwise, add the rowcount to the current interval.
     //
     else if (currentInt_ < intCount_ &&
           (intArry_[currentInt_].rowCount_ + numRows > step_ ||  // bucket overflow
            numRows > highFreqThreshold_ ||  // next intvl will be for high freq
            bigGap ||                        // next intvl will be for for gap
            intArry_[currentInt_].gapMagnitude_ > 0 || // current intvl is for gap
            intArry_[currentInt_].highFreq_))          // current intvl is for high freq
       {
         // Complete information for interval and start new one:
         // Save boundary and most frequent values.
         setBufferValue(lastValue, group, intArry_[currentInt_].boundary_);
         setBufferValue(mostFreqVal, group, intArry_[currentInt_].mostFreqVal_);
         intArry_[currentInt_].MFVrowCount_  = MFVrows;
         intArry_[currentInt_].MFV2rowCount_ = MFV2rows;
         MFVrows = MFV2rows = 0;  // Clear these for next interval;

         currentInt_++;
         intArry_[currentInt_].uecCount_ = 1;
         // If the current value is the high end of a big gap, set a nonzero gap
         // value for the next interval. This will cause that interval to be
         // completed with only that value when this function is called with the
         // next value. If the interval contains a single uec with a row count >=
         // the target bucket height, don't mark it as a gap because we want to
         // keep it a separate interval and not merge it with an adjacent interval
         // if it turns out not to be one of the biggest gaps.
         if (numRows < step_)
           {
             if (numRows > highFreqThreshold_)
               {
                 intArry_[currentInt_].highFreq_ = TRUE;
                 highFreqIntervalsUsed_++;
                 if (LM->LogNeeded())
                   {
                     sprintf(LM->msg,
                             "Interval %d used as high frequency interval with " PF64 " rows",
                             currentInt_, numRows);
                     LM->Log(LM->msg);
                   }
               }
             else if (bigGap)
               {
                 intArry_[currentInt_].gapMagnitude_ = gapMagnitude;
                 gapIntCount_++;
               }
           }

         if (NOT singleIntervalPerUec_)
           {
             // Adjust the interval threshold (STEP_) by the remainder rows
             // and intervals. Update remRows by subtracting the row count of the
             // interval just completed. Subtract the number of unused intervals
             // that were designated for high frequency values and gaps before
             // dividing to find new step size. The gap count is imprecise because
             // some will be merged back.
             remRows_ = MAXOF(remRows_ - intArry_[currentInt_ - 1].rowCount_, 1);

             // If gaps are being processed, there may be a shortfall of
             // intervals due to undersized gap intervals, which will drive
             // the step size higher. Here we release reserve intervals if
             // necessary to try to keep the step size from exceeding its
             // original value by more than 10%.
             Int32 remainingIntervalsAvailable;

             // Increase # of intervals until current step less than
             // 110% of original (or we use up all available intervals).
             do {
                 remainingIntervalsAvailable =
                    (MAXOF(1,
                           intCount_
                             - (highFreqIntervalsAllotted_ - highFreqIntervalsUsed_)
                             - (MAXOF(0, (Lng32)((targetGapIntervals_ - gapIntCount_) * 1.5)))
                             - (currentInt_ - 1)));
                 intCount_++; // This is the only place intCount_ is increased.
                 step_ = remRows_ / remainingIntervalsAvailable;
               }
             while (intCount_ <= maxAllowedInts_ && step_ > 1.1 * originalStep_);

             intCount_--; // This was incremented one too many times.
           }
       }
     else
       intArry_[currentInt_].uecCount_++;

     double numRowsd                     = (double) numRows;
     intArry_[currentInt_].rowCount_     += numRows;
     intArry_[currentInt_].squareCntSum_ += numRowsd * numRowsd;
     if (fi_)
       fi_[currentInt_].increment(numRows);

     // Update most frequent values.
     if (numRows > MFVrows)
     {
       MFV2rows = MFVrows;
       MFVrows = numRows;
       mostFreqVal = value;
     }
     else if (numRows > MFV2rows)
       MFV2rows = numRows;

     // If this is the last distinct value, set it as interval boundary value
     // instead of waiting for a value that forces start of a new interval.
     // If not doing internal sort, final=true may just mean end of a rowset,
     // but we have to set the boundary value and mostFreqVal in case it is the
     // last rowset. If not, the actual final value of the interval will
     // overwrite it. This is why we save lastValue even if final is true.
     if (final)
     {
       setBufferValue(value, group, intArry_[currentInt_].boundary_);
       setBufferValue(mostFreqVal, group, intArry_[currentInt_].mostFreqVal_);
       intArry_[currentInt_].MFVrowCount_  = MFVrows;
       intArry_[currentInt_].MFV2rowCount_ = MFV2rows;
       MFVrows = MFV2rows = 0;  // Clear these for next interval/column;
     }
     lastValue = value;
     return retcode;
   }
 };


 class HSInMemoryTable : public NABasicObject
 {
   public:
     HSInMemoryTable(NAString& tblName, NAString& condition,
                     Int64 maxRows, double sampleRate = 0)
       : tableName_(tblName),
         whereCondition_(condition),
         rows_(maxRows),  // replaced in populate() w/actual # rows read
         sampleRate_(sampleRate),
         columns_(NULL),
         isPopulated_(FALSE)
       {
         setUpColumns();
       }

     virtual ~HSInMemoryTable()
       {}

     HSColGroupStruct* getColumns() const { return columns_; }

     void setNumRows(Int64 x) {
        rows_ = x;
        HSGlobalsClass::getMemoryRequirements(columns_, rows_);
     }

     Int64 getNumRows() const { return rows_; }

     // method for algorithm 2
     void generateSelectList(NAString& queryText);

     void generateInsertSelectDQuery(NAString& targetTbl, NAString& smplTable,
                                     NAString& queryTex);

     void generateInsertSelectIQuery(NAString& targetTbl, NAString& sourceTable,
                                     NAString& queryText,
                                     NABoolean hasOversizedColumns, HSTableDef * objDef,
                                     Int64 currentSampleSize, Int64 futureSampleSize,
                                     Int64 sourceSetSize);

     void generateSelectDQuery(NAString& smplTable, NAString& queryTex);
     void generateSelectIQuery(NAString& smplTable, NAString& queryText);


     // method for algorithm 1
     void generateDeleteQuery(NAString& smplTable, NAString& queryText, NABoolean rollback);
     void generateInsertQuery(NAString& smplTable, NAString& sourceTable,
                              NAString& queryText, NABoolean rollback);

     Lng32 populate(NAString& queryText);

     // The data is actually deallocated by calling freeISMemory() from
     // HSGlobalsClass::incrementHistograms() for each column as soon as the
     // column is successfully handled by IUS (the data is preserved for use
     // by RUS/IS if IUS can't be performed). This function just resets the
     // flag that would cause assertion failure when populate() is called, as
     // it must be to load data for the next batch of IUS columns.
     void depopulate() {
       isPopulated_ = FALSE;
     }

     void logState(const char* title);

   private:
     // Copy construction/assignment not defined.
     HSInMemoryTable(const HSInMemoryTable&);
     HSInMemoryTable& operator=(const HSInMemoryTable&);

     void setUpColumns();


     NAString tableName_;
     NAString whereCondition_;
     Int64 rows_;
     double sampleRate_;
     HSColGroupStruct* columns_;
     NABoolean isPopulated_;
 };  // class HSInMemoryTable

 #endif /* HSGLOBALS_H */