core/sql/optimizer/Stats.h - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 #ifndef STATS_HDR
 #define STATS_HDR

 /* -*-C++-*-
  ******************************************************************************
  *
  * File:         Stats.h
  * Description:  This file includes definitions for statistics related
  *               information used by the NOAH optimizer.
  *
  * Created:      March 16, 1994
  * Language:     C++
  *
  *
  *
  *
  ******************************************************************************
  */


 // -----------------------------------------------------------------------
 //  Include Files
 // -----------------------------------------------------------------------
 #include <math.h>
 #include "BaseTypes.h"
 #include "CostScalar.h"
 #include "Collections.h"
 #include "NAType.h"
 #include "ValueDesc.h"
 #include "EncodedValue.h"
 #include "SharedPtrCollections.h"

 // -----------------------------------------------------------------------
 // macro to get the square of a number
 // -----------------------------------------------------------------------
 #ifndef SQUARE
 #define SQUARE(x) ((x)*(x))
 #endif

 // -----------------------------------------------------------------------
 //  The following classes are defined in this file.
 // -----------------------------------------------------------------------
 class HistInt;                    // Histogram interval
 class Interval ;                  // an intelligent interface to HistInts
 class Histogram;                  // Histogram: ordered collection of histogram intervals
 class ColStats ;                  // Column Statistics
 class StatsList;                  // List of column statistics
 class FrequentValue;     // hash value, encoded value and frequency of skewed values
 class FrequentValueList;
 class ColumnId;
 class ColumnSet;
 class MultiColumnHistogram;
 class MultiColumnHistogramList;

 typedef IntrusiveSharedPtr<ColStats> ColStatsSharedPtr;
 typedef IntrusiveSharedPtr<Histogram> HistogramSharedPtr;

 //enumerated types used in histogram intervals reduction

 //Source identifies the location from where the reduction
 //of the number of histogram intervals is invoked.
 //AFTER_FETCH implies after statistics have be fetched
 //using FetchHistogram
 //INTERMEDIATE implies after a new histogram has been
 //generated through an intermediate relational operation
 //like a join
 enum Source {AFTER_FETCH, INTERMEDIATE};

 //Criterion identifies the criterion to use while merging
 //two intervals for the purpose of reducing the number of
 //intervals in a histogram
 //NONE implies that no two intervals should not be merged
 //CRITERION1 implies that two intervals should be merged
 //using criterion 1 as defined in the histogram intervals
 //reduction design document
 //CRITERION2 implies that two intervals should be merged
 //using criterion 2 as defined in the histogram intervals
 //reduction design document
 enum Criterion {NONE=0, CRITERION1=1, CRITERION2=2};

 // ----------------------------------------------------------------------
 // An indication of how pairs of ColStats are to be combined/merged.
 // The following summarizes the impact on each ColStats' interval:
 // INNER_JOIN_MERGE:  'Typical' join
 //    numUec  = MINOF(leftUEC, rightUEC)
 //    numRows = (leftRowCount * rightRowCount) / MAXOF(leftUEC, rightUEC)
 // SEMI_JOIN_MERGE:
 //	numUec  = MINOF( leftUEC, rightUEC )
 //    numRows = leftRowCount * (numUec / leftUEC)
 // ANTI_SEMI_JOIN_MERGE:
 //    numUec  = MAXOF (0, leftUEC-rightUEC)
 //    numRows = leftRowCount * (numUec / leftUEC)
 // OUTER_JOIN_MERGE:
 //    similar to INNER_JOIN_MERGE, except it retains two copies of the
 //    INNER_JOIN_MERGE's result.
 //    [one to be LEFT_JOIN_OR_MERGED, the other to be null-augmented.]
 // LEFT_JOIN_OR_MERGE:
 //    merges the OUTER_JOIN_MERGE's result with the original histogram of
 //    the outer-join column.
 //    numUec  = rightUEC
 //    numRows = leftRowCount + ((rightRowCount/numUec) * (numUec-leftUEC))
 // UNION_MERGE:
 //	numUec  = MAXOF( leftUEC, rightUEC )
 //    numRows = leftRowCount + rightRowCount
 // OR_MERGE:
 //    numUec  = MAXOF( leftUEC, rightUEC )
 //    numRows = MAXOF( leftRowCount, rightRowCount )
 // AND_MERGE:
 //    numUec  = MINOF( leftUEC, rigthUEC )
 //    numRows = MINOF( leftRowcount, rightRowcount )
 // ----------------------------------------------------------------------
 enum MergeType { INNER_JOIN_MERGE, SEMI_JOIN_MERGE, ANTI_SEMI_JOIN_MERGE,
                  OUTER_JOIN_MERGE, LEFT_JOIN_OR_MERGE,
                  UNION_MERGE, OR_MERGE, AND_MERGE };

 // ColumnSet is a collection of ColumnIds
 class ColumnSet : public ClusteredBitmap
 {
  public:
   // constructor
   ColumnSet(NAMemory *heap) : ClusteredBitmap(heap) {}

   // construct a memory efficient representation of colArray
   ColumnSet(const NAColumnArray& colArray, NAMemory *heap);

   // copy constructor
   ColumnSet(const ColumnSet& other, NAMemory *heap)
     : ClusteredBitmap(other,heap) {}

   ~ColumnSet() {}

   // Iterator methods for a ColumnSet
   // use the iterators in a for loop like this (assuming you have a
   // ColumnSet S over which you want to iterate)
   // for (CollIndex x=0;  S.next(x); S.advance(x) )
   //    { /* x is the current element */ }
   CollIndex init() const  { return CollIndex(0); }
   NABoolean next(CollIndex &x) const { return nextUsed(x); }
   void advance(CollIndex & x) const  { x++; }

   void display() const;

   void print() const;
   void printColsFromTable( FILE* ofd, NATable* table) const;

  private:
   DISALLOW_COPY_AND_ASSIGN(ColumnSet);
   ColumnSet(); // should not be called, should get link error.
 };

 // -----------------------------------------------------------------------
 //  MC Skewed Value List - contains skewed values for a column group.
 //  Information is stored in two lists which need to be maintained in
 //  sync: List of distinct values and corresponding frequency
 // -----------------------------------------------------------------------
 class MCSkewedValue : public NABasicObject
 {
 public:

   MCSkewedValue(NAMemory * h = 0) : boundary_((NAWchar *)(L"")), frequency_(-1), heap_(h) {}

   MCSkewedValue(NAWchar * boundary, CostScalar frequency, EncodedValue * eV, UInt32 hash, NAMemory * h = 0) :
     boundary_(boundary) ,
     frequency_(frequency),
     mcEncodedValue_(eV),
     hash_(hash),
     heap_(h)
     {
     };

   ~MCSkewedValue() {};

   MCSkewedValue(const MCSkewedValue& x) :
      boundary_(x.boundary_), frequency_(x.frequency_), mcEncodedValue_(x.mcEncodedValue_), hash_(x.hash_) {};

   MCSkewedValue & operator=(const MCSkewedValue& other);

   inline NABoolean operator==(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ == *(other.mcEncodedValue_)); }
   inline NABoolean operator!=(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ != *(other.mcEncodedValue_)); }
   inline NABoolean operator<(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ < *(other.mcEncodedValue_)); }
   inline NABoolean operator<=(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ <= *(other.mcEncodedValue_)); }
   inline NABoolean operator>(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ > *(other.mcEncodedValue_)); }
   inline NABoolean operator>=(const MCSkewedValue& other) const
   { return (*mcEncodedValue_ >= *(other.mcEncodedValue_)); }

   const NAWchar * getBoundary() { return boundary_; }
   CostScalar getFrequency() { return frequency_; }
   const EncodedValue * getEncodedValue () { return mcEncodedValue_ ; }
   const UInt32 getHash () { return hash_; }

   void display() const;
   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL) const;

 private:
   NAWchar * boundary_ ;
   CostScalar frequency_ ;
   EncodedValue * mcEncodedValue_;
   UInt32 hash_;
   NAMemory * heap_;
 };

 class MCSkewedValueList : public NAList<MCSkewedValue *>
 {
 public:
   // constructor
   MCSkewedValueList(NAMemory *h=0)
     : NAList<MCSkewedValue *>(h),heap_(h) {};

   MCSkewedValueList(const MCSkewedValueList & mcsvl, NAMemory *h=0);

   ~MCSkewedValueList() {};

   void addMCSkewedValue(MCSkewedValue *newValue);
   void addMCSkewedValue(const NAWchar * boundary, CostScalar frequency, const EncodedValue & eV, UInt32 hash);

   MCSkewedValueList & operator=(const MCSkewedValueList& other);
   NABoolean operator==(const MCSkewedValueList& other);

   void mergeMCSkewedValueList(MCSkewedValueList * leftSide,
                               MCSkewedValueList * rightSide,
                               CostScalar avgRowcountForNonSkewValuesOnLeftSide,
                               CostScalar avgRowcountForNonSkewValuesOnRightSide,
                               MergeType mergeMethod = INNER_JOIN_MERGE);

   void display() const;
   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL) const;

 private:
   NAMemory * heap_;
 };

 // MultiColumnHistogram is the memory-efficient contextheap
 // representation of a table's multi-column histogram
 class MultiColumnHistogram : public NABasicObject
 {
  public:
   MultiColumnHistogram
     (ColumnSet&  columns,
      CostScalar& uec,
      CostScalar& rows,
      ComUID&     id,
      MCSkewedValueList *mcSkewedValueList,
      ColStatsSharedPtr colStatsPtr,
      NAMemory*   heap
      )
     : columns_(columns, heap), uec_(uec), rows_(rows)
     , histogramID_(id), mcSkewedValueList_(mcSkewedValueList)
     , colStatsPtr_(colStatsPtr), heap_(heap) {}

   ~MultiColumnHistogram() {columns_.clear();}

   CostScalar uec()  const { return uec_; }
   CostScalar rows() const { return rows_; }
   ComUID     id()   const { return histogramID_; }

   const ColumnSet& cols() const { return columns_; }
   const MCSkewedValueList *getMCSkewedValueList() const { return mcSkewedValueList_; }

   ColStatsSharedPtr getColStatsPtr()   const { return colStatsPtr_; }

   void display() const;
   void print( FILE* ofd=stdout, NATable* table=NULL) const;

  private:
   DISALLOW_COPY_AND_ASSIGN(MultiColumnHistogram);

   ColumnSet  columns_;
   CostScalar uec_;
   CostScalar rows_;
   ComUID     histogramID_;
   MCSkewedValueList *mcSkewedValueList_;
   ColStatsSharedPtr colStatsPtr_;
   NAMemory  *heap_;
 };

 // MultiColumnHistogramList is the memory-efficient contextheap
 // representation of a table's multi-column histograms only
 class MultiColumnHistogramList : public NAList<MultiColumnHistogram*>
 {
  public:
   // constructor
   MultiColumnHistogramList(NAMemory * heap)
     : NAList<MultiColumnHistogram*>(heap), heap_(heap) {}

   // destructor
   ~MultiColumnHistogramList();

   // add this multi-colum histogram to this list
   // (avoid adding any duplicate multi-column histograms)
   void addMultiColumnHistogram(const ColStats & mcStat,
                                ColumnSet* singleColPositions=NULL);

   // add these multi-colum histograms to this list
   // (avoid adding any duplicate multi-column histograms)
   void addMultiColumnHistograms(const StatsList & colStats);

   void display() const;
   void print( FILE* ofd=stdout, NATable* table=NULL) const;

  private:
   DISALLOW_COPY_AND_ASSIGN(MultiColumnHistogramList);

   NAMemory  *heap_;
 };

 class FrequentValue : public NABasicObject
 {
 public:

   FrequentValue(UInt32 hashValue, CostScalar frequency,
                 CostScalar probability, EncodedValue value);

   FrequentValue() : hash_(0), frequency_(-1), probability_(0), encodedValue_(UNINIT_ENCODEDVALUE) {}

   FrequentValue(const FrequentValue& x) :
      hash_(x.hash_), frequency_(x.frequency_), probability_(x.probability_), encodedValue_(x.encodedValue_) {}

   FrequentValue(const EncodedValue& normValue, ConstValue* itemPtr, const NAType*,
                 CostScalar frequency = csOne, CostScalar probability = csOne
                );

   ~FrequentValue() {};

   inline UInt32 getHash() const { return hash_; }
   inline CostScalar getFrequency() const { return frequency_; }
   inline CostScalar getProbability() const { return probability_; }
   inline EncodedValue getEncodedValue () const { return encodedValue_ ; }

   void setFrequency(CostScalar freq) { frequency_ = freq; }
   void setProbability(CostScalar prob = csOne) { probability_ = prob; }

   NABoolean operator ==(const FrequentValue& x) const
    { return (encodedValue_.isNullValue() && x.encodedValue_.isNullValue() ) ||
             (encodedValue_ == x.encodedValue_ && hash_ == x.hash_); }

   NABoolean operator >(const FrequentValue& x) const
    { return (encodedValue_.isNullValue() && !x.encodedValue_.isNullValue()) ||
             (encodedValue_ > x.encodedValue_ ||
              (encodedValue_ == x.encodedValue_ && hash_ > x.hash_)); }

   NABoolean operator <(const FrequentValue& x) const
    { return (!encodedValue_.isNullValue() && x.encodedValue_.isNullValue()) ||
             (encodedValue_ < x.encodedValue_ ||
              (encodedValue_ == x.encodedValue_ && hash_ < x.hash_)); }

   void print (FILE *f, const char * prefix, const char * suffix,
               CollHeap *c=NULL, char *buf=NULL) const;

 private:
   UInt32 hash_;
   CostScalar frequency_;
   CostScalar probability_;
   EncodedValue encodedValue_;
 };


 class FrequentValueList : public LIST(FrequentValue)
 {
 public:
   FrequentValueList (NAMemory* h,CollIndex initLen =0)
     : LIST(FrequentValue) (h,initLen)
   { };

   FrequentValueList (const FrequentValueList & fvlist, NAMemory* h) :
     LIST(FrequentValue)(fvlist, h)
     {}

   ~FrequentValueList() {};

   NABoolean isFull();

   void insertFrequentValue(const FrequentValue & freqValue);

   void scaleFreqAndProbOfFrequentValues(CostScalar freqScale,
                                         CostScalar probScale);

   void deleteFrequentValuesAboveOrEqual(const EncodedValue & val, NABoolean include = FALSE);

   void deleteFrequentValuesBelowOrEqual(const EncodedValue & val, NABoolean include = FALSE);

   void deleteAllButThisFreqVal(const FrequentValue& key);

   void deleteFrequentValue (const FrequentValue& key);

   void removeNULLAsFrequentValue();

   CostScalar getTotalFrequency() const;
   CostScalar getTotalProbability() const;
   CostScalar getMaxFrequency() const;
   FrequentValue getMostFreqValue(EncodedValue value) const;
   FrequentValue getMostFreqValue() const;

   void mergeFreqFreqValues(FrequentValueList &leftFrequentValueList,
                            FrequentValueList &rightFrequentValueList,
                            CostScalar scaleFactor,
                            MergeType mergeMethod,
                            FrequentValueList *tmpLeftFreqValueList,
                            FrequentValueList *tmpRightFreqValueList);

   void scaleAndAppend(FrequentValueList & rightFrequentValueList,
                       CostScalar adjFreq,
                       CostScalar adjProb,
                       CostScalar scaleFactor);

   NABoolean getfrequentValueIndex(const FrequentValue&, CollIndex & index) const;

   CostScalar freqOfGivenEncodedVal(EncodedValue mfvEV,
                                    EncodedValue loBoundary,
                                    EncodedValue hiBoundary,
                                    CostScalar &mfvCnt) const;
   void display() const;

   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL) const;
 };

 // a list to encapsulate an MC list of encoded values
 class MCboundaryValueList : public LIST(EncodedValue)
 {
 public:
    MCboundaryValueList (NAMemory* h=STMTHEAP) : heap_ (h), LIST(EncodedValue) (h) {};

    MCboundaryValueList (const MCboundaryValueList& other, NAMemory* h=STMTHEAP) : heap_ (h), LIST(EncodedValue) (h)
    {
       for (Int32 i=0; i < other.entries(); i++)
       {
          this->insert(other[i]);
       }
    }

    MCboundaryValueList (const NormValueList* nvl, NAMemory* h=STMTHEAP) : heap_ (h), LIST(EncodedValue) (h)
    {
       for (Int32 i=0; i < nvl->entries(); i++)
       {
         EncodedValue ev;
         ev.setValue(nvl->at(i));
         this->insert(ev);
       }
    };

    COMPARE_RESULT compare (const MCboundaryValueList &other) const
    {
       DCMPASSERT (this->entries() == other.entries());
       for (Int32  i = 0; i < this->entries(); i++)
       {
           if ((*this)[i].compare(other[i]) == SAME)
             continue;
           return ((*this)[i].compare(other[i]));
       }
       return (SAME);
    }

    NABoolean operator< (const MCboundaryValueList &other) const
    {
       return (this->compare(other) == LESS);
    }
    NABoolean operator<= (const MCboundaryValueList &other) const
    {
       COMPARE_RESULT  result = this->compare(other);
       return ((result == SAME) || (result == LESS));
    }
    NABoolean operator> (const MCboundaryValueList &other) const
    {
       return (this->compare(other) == MORE);
    }
    NABoolean operator>= (const MCboundaryValueList &other) const
    {
       COMPARE_RESULT  result = this->compare(other);
       return ((result == SAME) || (result == MORE));
    }

    void getValueList (NormValueList& list)
    {
       for (Int32  i = 0; i < this->entries(); i++)
       {
           list.insertAt(i, ((*this)[i].getValue()));
       }
    }

    NAString* convertToString (const NAColumnArray& colArray, NABoolean forLastInterval);

    // for each MC histogram we have two boundary values b_low and b_high. Assuming we have r regions we would like
    // to distributed the data to.
    //
    // b_low   = (l1, ....., ln) where n is the number of columns in the MC
    // b_high  = (h1, ......, hn)
    //
    // then the ranges that will be created are as follows
    //
    // - for range 1 the begin key will be b_low
    // - for all other ranges k from 2 to r-1, the begin key is (vk1,...,vkn) where vki is computed as follow:
    //     vki = v(k-1)i + (hi-li)/n
    //
    void getMinMax (const MCboundaryValueList& lv, const MCboundaryValueList& hv, Int32 numParts, LIST(MCboundaryValueList) &vals);

    // ---------------------------------------------------------------------
    //   // Print
    // ---------------------------------------------------------------------
    void display() const ;

    void print( FILE* ofd = stdout,
                const char* indent = DEFAULT_INDENT,
                const char* title = "MCboundaryValueList") const;

    ~MCboundaryValueList () {};
 private:
   NAMemory * heap_;
 };


 // -----------------------------------------------------------------------
 //  HistInt: "HISTogram INTerval"
 // -----------------------------------------------------------------------

 class HistInt
 {
 friend class Interval ;

 protected:
   // copy method
   void copy (const HistInt& other);

 public:

   // -----------------------------------------------------------------------
   //  Constructors
   // -----------------------------------------------------------------------
   HistInt () : rows_(0), uec_(0), boundary_(UNINIT_ENCODEDVALUE), boundInc_(FALSE), hash_(0), rows2mfv_(0),
                MCBoundary_(STMTHEAP)
   {}


   HistInt (const EncodedValue & value, NABoolean boundIncluded = FALSE, UInt32 hash = 0)
        : rows_(0), uec_(0), boundary_(value), boundInc_(boundIncluded), hash_(hash), rows2mfv_(0),
          MCBoundary_(STMTHEAP)
   {
      setupMCBoundary ();
   }

   HistInt(Int32 intNum, const NAWchar *intBoundary, const NAColumnArray &columns,
 	  CostScalar card, CostScalar uec, NABoolean boundInc = TRUE, CostScalar card2mfv = 0) ;

   // copy constructor
   HistInt (const HistInt & other) : MCBoundary_(STMTHEAP)
   { copy(other); }

 public:

   // assignment operator
   HistInt & operator = (const HistInt& other)
   {
     if (this != &other)
       copy(other);
     return *this;
   }

   void setupMCBoundary ();

   // comparison operator
   NABoolean operator == (const HistInt& other) const
   {
   if(this->boundary_.compare(other.boundary_) == SAME
       && this->boundInc_ == other.boundInc_
       && this->MCBoundary_.compare(other.MCBoundary_) == SAME
       && (this->rows_) == other.rows_
       && (this->uec_) == other.uec_
      )
       return TRUE;
   else return FALSE;
       }

   // -----------------------------------------------------------------------
   //  Destructor
   // -----------------------------------------------------------------------
   virtual ~HistInt() {}

   // ---------------------------------------------------------------------
   //  Accessor Functions
   // ---------------------------------------------------------------------
   inline const EncodedValue & getBoundary () const { return boundary_ ; }
   inline const MCboundaryValueList & getMCBoundary () const { return MCBoundary_ ; }
   inline NABoolean isBoundIncl ()            const { return boundInc_ ; }
   inline CostScalar getCardinality ()        const { return rows_ ; }
   inline CostScalar getUec()                 const { return uec_; }
   inline UInt32 getHash()                    const { return hash_; }
   inline CostScalar getCardinality2mfv()      const { return rows2mfv_ ; }

   inline CostScalar getFudgedUec ()          const
   { return ( uec_ == csZero ? uec_ :  MAXOF(uec_, csOne) ) ; }
   inline NABoolean isNull ()                 const
   { return boundary_.isNullValue() ; }

   // ---------------------------------------------------------------------
   //  Manipulation Methods
   // ---------------------------------------------------------------------
   inline void setBoundary (const EncodedValue & intBound)
   { boundary_ = intBound ; }
   inline void setBoundIncl(NABoolean boundIncl = TRUE)
   { boundInc_ = boundIncl ; }

   // ---------------------------------------------------------------------
   // HistInt::mergeInterval, merges the left and right intervals based
   // on the mergeMethod. This is a helper method for ColStats::mergeColStats
   // ----------------------------------------------------------------------
   CostScalar mergeInterval(const HistInt & left,
                           const HistInt & right,
                           CostScalar scaleRowCount,
                           MergeType mergeMethod = INNER_JOIN_MERGE
 );

   // the following is used to maintain the semantic : uec <= rows
   void setCardAndUec (CostScalar card, CostScalar uec) ;
   void setCardinality2mfv(CostScalar card) ;

   // -----------------------------------------------------------------------
   //  Utility routines
   // -----------------------------------------------------------------------
   void display (FILE *f = stdout,
 		const char * prefix = DEFAULT_INDENT,
 		const char * suffix = "",
                 CollHeap *c=NULL, char *buf=NULL) const;

 private:
   // these first two methods are private because we never want them used
   // individually ==> use the ::setCardAndUec() routine instead

   // NB: the values for these should always be >= 0 !
   // --> and if the value is extremely low (e.g., 1e-16), round it to zero
   void setCardinality (CostScalar card);
   void setUec (CostScalar uec);

   EncodedValue boundary_;      // histint boundary (upper bound)
   MCboundaryValueList MCBoundary_;
   NABoolean boundInc_;         // TRUE ==> boundary is inclusive
   CostScalar rows_;            // histint cardinality
   CostScalar uec_;             // histint UEC
   UInt32 hash_;                // histint hash for some SQL data type T
                                // such that any v of T we have
                                // EncodedValues(v) != v. CHAR is one such
                                // SQL data type.
   CostScalar rows2mfv_;         // rowcount for the 2nd most frequent value
                                // in the interval
 };  // HistInt


 // -----------------------------------------------------------------------
 //  Histogram Class - a collection of intervals
 // -----------------------------------------------------------------------
 class Histogram : public LIST (HistInt)
 {
   friend class Interval ;

 public:

   Histogram(NAMemory* h) : LIST(HistInt)(h)
     {}

   Histogram (const Histogram & hist, NAMemory* h) :
     LIST(HistInt)(hist, h)
     {}

 private:
   // we prevent people from creating Histograms that aren't explicitly put
   // on a heap! code doing this will not link!
   Histogram () ;
   Histogram (const Histogram & hist) ;

 public:

   // ---------------------------------------------------------------------
   //  Histogram Manipulation routines
   // ---------------------------------------------------------------------
   // Given two histograms, create a Template histogram to use in subsequent
   //  merge operations involving those two histograms.

   // newest, simplest version of cMT -- it works and is much easier to
   // understand
   HistogramSharedPtr createMergeTemplate (const HistogramSharedPtr& otherHistogram,
                                           NABoolean equiMerge) const;

   // ----------------------------------------------------------------------
   // utility routine used by ColStatDescList::divideHistogramAtPartitionBoundaries()
   //
   // given a two histograms, merges all intervals in THIS that do not occur in
   // partitionBoundaries
   //
   // if there are any intervals in THIS that are outside the range of
   // partitionBoundaries, then returns FALSE
   //
   // returns TRUE if successful without errors, FALSE otherwise
   NABoolean condenseToPartitionBoundaries (const HistogramSharedPtr& partitionBoundaries) ;

 public:


   // -----------------------------------------------------------------------
   // simplifying, oft-used utility routines
   // (2 versions, because sometimes this is a useful shortcut from
   // within other const member functions)
   // -----------------------------------------------------------------------
   inline HistInt& firstHistInt()             { return (*this)[0] ; }
   inline const HistInt& firstHistInt() const { return (*this)[0] ; }

   inline HistInt& lastHistInt()              { return (*this)[this->entries()-1] ; }
   inline const HistInt& lastHistInt() const  { return (*this)[this->entries()-1] ; }

   // -----------------------------------------------------------------------
   // Interval : a simplification of HistInts
   // --> needed because of all the hassles/headaches caused by
   //     "single-value intervals"
   // -----------------------------------------------------------------------

   // returns the # of intervals in the Histogram
   // this number will be somewhere in between 0 and entries()
   CollIndex numIntervals() const
   {
     DCMPASSERT( entries() != 1 ) ; // generic Histogram sanity check

     if( entries() == 1)
     {
       // log the message to the event log
       // SQLMXLoggingArea::logSQLMXAssertionFailureEvent(__FILE__, __LINE__, "Histogram has just one HistInt");

       Histogram* tempHist = (Histogram *)this;
       tempHist->clear();
     }

     if ( entries() == 0 )
       return 0 ;

     return (entries()-1) ;
   }

   // -----------------------------------------------------------------------
   // NULL-handling Histogram methods
   // -----------------------------------------------------------------------

   // is there a NULL interval in this Histogram?
    NABoolean isNullInstantiated() const ;
   // remove that NULL interval if it exists
    void removeNullInterval() ;
   // insert a NULL interval if it doesn't exist
    void insertNullInterval () ;

   // NB: Intervals are indexed from 1..numIntervals()

   // inserts, as necessary, an SVI into the list of HistInts
   // with the appropriate value ;
   // assigns this SVI the appropriate row/uec values, subtracting from
   // the neighboring Intervals as necessary
   CollIndex insertSingleValuedInterval (const EncodedValue & value,
                                         NABoolean distributeRowsAndUec = TRUE);

   CostScalar mergeSVIWithNextAndSetMaxFreq();

   //
   // Does the work of splitting the Histogram at a certain boundary
   // value.
   //
   // This may or may not require inserting a HistInt (if value is not
   // equal to a current HistInt boundary value, we will need to insert
   // a new HistInt -- and even if value IS equal to a current HistInt
   // value, we often need to insert another HistInt, depending on
   // whether we're splitting the Histogram for a <=,>=,< or > operation.
   // (We may need to create an S.V.I. in order to split the Histogram
   // in the right place.)

   // inserts a new zero-row/uec interval into the Histogram
   // --> NB, can only do this if this Interval is above the
   //     range of current Intervals, below them, or none currently exist
   //
   // The 'isNewBoundIncluded' flag determines the boundary inclusiveness
   // of the new Interval.
   void insertZeroInterval (const EncodedValue & loValue,
                            const EncodedValue & hiValue,
                            NABoolean isNewBoundIncluded) ;
   void insertZeroInterval (const CostScalar& loValue,
                            const CostScalar& hiValue,
                            NABoolean isNewBoundIncluded) ;

   void insertZeroInterval (const NormValueList& loValue,
                            const NormValueList& hiValue,
                            NABoolean isNewBoundIncluded) ;


   // 2 steps to condense a histogram into a single interval:
   // 1. takes all of the current intervals and adds up the rows / uecs
   // 2. creates a single interval with the same max/min/row/uec values
   void condenseToSingleInterval();

   // Histogram intervals reduction methods
   // Method to reduce the number of intervals
   void reduceNumHistInts(Criterion reductionCriterion,
                                  Source invokedFrom = AFTER_FETCH);

   // compute the extended boundaries of an interval when compared to its neighbors. The method does not
   // have any side affect on the interval or its neighbors . This is used by the HQC logic
   void computeExtendedIntRange (Interval& currentInt, Criterion& reductionCriterion,
                                 EncodedValue& hiBound, EncodedValue& loBound,
                                 NABoolean& hiBoundInclusive, NABoolean& loBoundInclusive);


   // ----------------------------------------------------------------------------
   // Method to reduce the number of histogram intervals based on query predicates
   // example predicates
   // * t1.col1 = 3
   // * t1.col1 < 3
   // * t1.col1 > 1
   // * t1.col1 > 1 and t1.col1 < 3
   // * t1.col1 = t2.col1 // i.e. a join predicate
   // ----------------------------------------------------------------------------
   void compressHistogramForQueryPreds(ItemExpr * lowerBound,
                                       ItemExpr * upperBound,
                                       NABoolean hasJoinPred = FALSE);

   // to calculate the selectivity for an equality predicate
   NABoolean computeSelectivityForEquality(ItemExpr * constVal, // input
                                           CostScalar totalRowcount,  // input
                                           CostScalar totalUEC, // input
                                           CostScalar& selectivity // output
                                           );

   // iterator methods
   inline Interval getFirstInterval() const ;
   inline Interval getLastInterval() const ;
   inline Interval getLastNonNullInterval () const ;
   Interval getNextInterval(const Interval & current) const ;
   Interval getPrevInterval(const Interval & current) const ;

   // returns the interval #'d by index
   inline Interval getInterval (CollIndex index) const ;

   // -----------------------------------------------------------------------
   //  Utility routines
   // -----------------------------------------------------------------------
   void display() const;

   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL) const;

   // This is part of the IntrusiveSharedPtr mechanism.
   INTRUSIVE_SHARED_PTR(Histogram);
 private:

   // removed the heap ptr because we don't need it
   //  NAMemory* heap_;
 }; // Histogram


 // -----------------------------------------------------------------------
 //  Interval - an attempt to abstract away the nastiness
 //             of "single-valued intervals"
 //  purpose: mainly used to enumerate through the Histogram
 //  easi/
 // Here is what Intervals look like :
 //
 // HistInts:
 //
 //#    0    1    2    3    4    5
 //
 //row  0    2    0    3    1    2
 //uec  0    3    0    1    2    3
 //
 //val  1    2    4    4    5    7
 //     |    |    |_3__|    |    |
 //     |_2__|    |    |    |_2__|
 //     |    |    |    |_1__|    |
 //     |    |_0__|    |    |    |
 //
 //       I1   I2   I3   I4   I5
 //
 //row    2    0    3    1    2
 //uec    2    0    1    1    2
 //hi     2    4    4    5    7
 //lo     1    2    4    4    5
 //
 // I1..I5 are the Intervals corresponding to
 // the underlying HistInts
 // --> I assert it's easier to work with Intervals
 //     than HistInts, since they're what we're actually
 //     concerned with -- the intervals between HistInt
 //     boundaries (the "bars" in a histogram), not the
 //     HistInts themselves
 //
 // So, Interval N lies between (*hist_)[N] and (*hist_)[N+1]
 //
 //  RESTRICTION RESTRICTION RESTRICTION RESTRICTION RESTRICTION RESTRICTION
 //
 //  As a performance improvement this class now keeps a local copy
 //  of (*hist_)[N+1] as HistInt hiInt. Need to make sure you have the
 //  right copy or else there will be assertion failure in debug version.
 //  Procedure refreshHiInt() will get the local copy in sync. But use
 //  this call miserly as it is expensive.
 //
 // downfalls to avoid:
 //
 // 1.  Interval iter = histogram->getFirstInterval();
 //     Interval iter1 = iter;
 //     iter.setRowCount(45);
 //
 //      In the above case although the iter and histogram got modified, iter1
 //      did not. Developer should set the rowcount fisrt then do the assignment
 //      or call refreshHiInt() for iter1.
 //
 // 2.   Interval iter = histogram->getFirstInterval();
 //      iter.next();
 //      histogram.removeat(iter.getLoIndex() +1);
 //
 //      Now iter's hiInt is bad because it was removed by histogram so
 //      developer needs to call refreshHiInt()
 // -----------------------------------------------------------------------

 class Interval : public NABasicObject
 {
 public:
   //
   // ctors
   // NB: no need for a dtor
   //
   Interval () :
       loIndex_(NULL_COLL_INDEX), hist_(0)
   {}

   Interval (CollIndex startIndex, const HistogramSharedPtr& hist) :
        loIndex_(startIndex), hist_(hist) ,
        hiInt_((*hist)[startIndex+1])

   {}

   // copy ctor
   Interval (const Interval & other) :
    loIndex_(other.loIndex_), hist_(other.hist_) ,
    hiInt_((*hist_)[loIndex_+1])

   {}

   Interval & operator = (const Interval & other)
   {
     loIndex_ = other.loIndex_ ;
     hist_    = other.hist_ ;
     if(hist_)
     {
       hiInt_ = ((*hist_)[loIndex_+1]);
     }
     return *this ;
   }

   //
   // simple inline methods
   //

   // needed for Collections classes, for some reason
   inline NABoolean operator == (const Interval & other)
   { return ( loIndex_ == other.loIndex_ && hist_ == other.hist_ ) ; }

   // am I the first/last Interval in the histogram?
   inline NABoolean isFirst() const ;
   inline NABoolean isLast() const ;

   // gets/sets the uec & rowcount values
   CostScalar getUec() const
   {
   OK() ;
   DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
   return hiInt_.getUec() ;
   }

   CostScalar getRowcount() const
   {
   OK() ;
   DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
   return hiInt_.getCardinality() ;
   }

   UInt32 getHash() const
   {
   OK() ;
   DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
   return hiInt_.getHash() ;
   }

   // get the rowcount of the 2nd most frequent value
   CostScalar getRowcount2mfv() const
   {
   OK() ;
   DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
   return hiInt_.getCardinality2mfv() ;
   }

   // search list for the MFV that is contained in this Interval. Also
   // return the MFV's frequency. Return FALSE if this interval does not
   // associate with any skew values.
   NABoolean
   getMFV(const FrequentValueList&, EncodedValue& mfv, CostScalar& freq);

   NABoolean
   getMFV(const MCSkewedValueList&,  MCboundaryValueList& mfv, CostScalar& freq);


   void getRCSmallerThanMFV(const EncodedValue& mfv,
                                  const CostScalar& freqMFV,
                                  CostScalar& rc);

   void getRCSmallerThanMFV(const MCboundaryValueList& mfv,
                                  const CostScalar& freqMFV,
                                  CostScalar& rc);


   void makeSplits(
                           HistogramSharedPtr& newHist,
                           const NAType* nt,
                           const CostScalar newHeight,
                           CostScalar& newRC,        // On entry: the # to fill;
                                                     // On exit: #rc unfilled
                           CostScalar& availableRC,  // On extry: rc available;
                                                     // On exit: the # of rows remaining in the interval
                           CostScalar& lowB,         // On entry: the low bound to use to insert the new
                                                     // first interval.
                                                     // On exit: the current last low bound to use
                                                     // to insert a new interval.
                           const CostScalar& lowBInt,// the low and high bound in which availableRC
                           const CostScalar& hiBInt, // #rows resides. The two bounds are used to
                                                     // compute the new high bound(s) for new intervals
                           NABoolean allowSplits);


   void makeSplitsForMC(   HistogramSharedPtr& newHist,
                           const CostScalar newHeight,
                           CostScalar& newRC,        // On entry: the # to fill;
                                                     // On exit: #rc unfilled
                           CostScalar& availableRC,  // On extry: rc available;
                                                     // On exit: the # of rows remaining in the interval
                           NormValueList* lowB,      // On entry: the low bound to use to insert the new
                                                     // first interval.
                                                     // On exit: the current last low bound to use
                                                     // to insert a new interval.
                           NormValueList*& lowBInt,  // the low and high bound in which availableRC
                           NormValueList*& hiBInt,   // #rows resides. The two bounds are used to
                                                     // compute the new high bound(s) for new intervals
                           NABoolean allowSplits);

 private:
   // these two are private because we never want them used individually
   // ==> use the following routine instead
   inline void setUec (const CostScalar & value) ;
   inline void setRowcount (const CostScalar & value) ;

 public:
   inline void setRowsAndUec (const CostScalar & rows,
                              const CostScalar & uec) ;

   inline void refreshHiInt();
   // gets/sets the lo/hi boundary values
   inline const EncodedValue& loBound() const ;

   inline const MCboundaryValueList& loMCBound() const;

   const MCboundaryValueList& hiMCBound() const
   {
      DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
      return hiInt_.getMCBoundary() ;
   }

   const EncodedValue& hiBound() const
   {
     DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
     return hiInt_.getBoundary() ;
   }

   inline void setLoBound (const EncodedValue & newLo) ;
   inline void setHiBound (const EncodedValue & newHi) ;

   // gets/sets the bounds incl values
   inline NABoolean isLoBoundInclusive() const ;
   NABoolean isHiBoundInclusive() const
   {
     DCMPASSERT(hiInt_ == ((*hist_)[loIndex_+1]));
     return ( hiInt_.isBoundIncl() ) ;
   }
   inline void setLoBoundInclusive (NABoolean value) ;
   inline void setHiBoundInclusive (NABoolean value) ;

   inline NABoolean isSingleValued() const { return (loBound() == hiBound()) ; }
   inline CollIndex getLoIndex() const { OK() ; return loIndex_ ; }

   inline NABoolean isNull() const { return (hiBound().isNullValue()) ; }

   // make sure we don't use Intervals that have been compromised
   inline void setInvalid() { hist_ = 0; }
   inline NABoolean isValid() const
   {
     return ( (hist_ != 0) &&
              ((loIndex_+2) <= hist_->entries()) ) ;
   }
   // was loIndex_ <= hist_->entries()-2, but we hate underflow!

   //
   // non-trivial Interval functions
   //

   // sets THIS equal to the interval that comes after/before THIS
   void next () ;
   void prev () ;

   // Does this interval contain a frequent value?
   NABoolean containsAFrequentValue(const CostScalar & thresholdFreq) const;

   // merge self with 'other'
   NABoolean merge (Interval & other) ;

   // is the UEC or Rowcount too low?
   // that is, greater than 0 but less than 1
   // --> returns TRUE when this is true
   NABoolean canBeMerged() const ;

   // figures out which intervals, starting with 'startInterval', are
   // spanned by THIS (NB: startInterval is most likely in another
   // histogram)
   NABoolean spans (const Interval & startInterval) const ;

   // figures out whether THIS contains 'value'
   NABoolean containsValue (const EncodedValue & value) const ;

   // removes 'value' from THIS
   // --> will probably require creating additional Intervals,
   //     in order to create the single-valued 0-uec/0-rowcount
   //     Interval
   void removeValue (const EncodedValue & value) ;

   // -----------------------------------------------------------------------
   // the following function originally was a global function because it's
   // useful in places where you don't want to call it on an Interval
   // object; however, to avoid polluting the global namespace, and to make
   // it easier to find for anyone else who wants to use it, it's now an
   // Interval method.  However, note that it does not use any state
   // information from the calling object.
   //
   // distributes uecs/rows contained between loBound & hiBound to the
   // Intervals in 'spanList'
   // -----------------------------------------------------------------------
   static void distributeRowsAndUec (LIST(Interval) & spanList,
                                     CostScalar rows,
                                     CostScalar uecs,
                                     const EncodedValue & loBound,
                                     const EncodedValue & hiBound) ;

   // -----------------------------------------------------------------------
   // methods used for histogram intervals reduction
   // -----------------------------------------------------------------------

   // compare this interval with the adjacent interval (which should be passed in via
   // other) and return TRUE if they are equal based on the Criterion passed in
   NABoolean compare(Source invokedFrom, Criterion reductionCriterion, Interval & other);

   // compare this interval with the adjacent interval (which should be passed in via
   // other) and return TRUE if they are equal based on criterion 1.
   // The definition of criterion 1 can be found in the histogram intervals reduction
   // design document
   NABoolean satisfiesCriterion1(Source invokedFrom, Interval & other);

   // compare this interval with the adjacent interval (which should be passed in via
   // other) and return TRUE if they are equal based on criterion 2.
   // The definition of criterion 2 can be found in the histogram intervals reduction
   // design document
   NABoolean satisfiesCriterion2(Source invokedFrom, Interval & other);

   // -----------------------------------------------------------------------
   //  Utility routines
   // -----------------------------------------------------------------------
   void display (FILE *f = stdout,
 		const char * prefix = DEFAULT_INDENT,
 		const char * suffix = "") const;

 private:

 #ifndef NDEBUG
   // internal consistency check -- for debugging only
   NABoolean OK() const;
 #else
   inline NABoolean OK() const {return TRUE;};
 #endif

   CollIndex loIndex_ ;         // which HistInt do we start with?
   HistogramSharedPtr hist_ ;
   HistInt hiInt_;            // the histogram we're interfacing with
 } ;

 // get the next/prev Interval

 //
 // For bounds inclusive information, keep in mind the
 // semantics of the HistInt isBoundIncl_ flag.  It's
 // usually true, except for the zeroth HistInt (which
 // means that the first interval's lower bound is
 // bounds inclusive), or the lower interval in a S.V.I.,
 // which means that the S.V.I. is completely bounds
 // inclusive.
 //
 // 0    1    2    3    4
 //
 // 1    2    3    3    4
 // <    <=   <    <=   <=
 // |    |    |    |    |
 // |    |    |    |    |
 // |    |    |    |    |
 //   I1   I2   I3   I4
 //
 // So, given the above HistInts :
 //
 //   I1 spans 1 and 2, including the boundaries 1 & 2
 //   I2 spans 2 and 3, not including either boundary
 //   I3 spans 3 and 3, including that boundary
 //   I4 spans 3 and 4, including 4 but not 3
 //

 inline NABoolean
 Interval::isLoBoundInclusive() const
 {
   return ( NOT ((*hist_)[loIndex_]).isBoundIncl() ) ;
 }


 // set the inclusive boundary values

 inline void
 Interval::setLoBoundInclusive (NABoolean value)
 {
   ((*hist_)[loIndex_]).setBoundIncl (NOT value);
 }

 inline void
 Interval::setHiBoundInclusive (NABoolean value)
 {
   HistInt & temp = ((*hist_)[loIndex_+1]);
   temp.setBoundIncl (value) ;
   hiInt_ = temp;
 }

 // return lo/hi bound information

 inline const EncodedValue&
 Interval::loBound() const
 {
   return ((*hist_)[loIndex_]).getBoundary() ;
 }

 inline const MCboundaryValueList&
 Interval::loMCBound() const
 {
   return ((*hist_)[loIndex_]).getMCBoundary() ;
 }


 // set lo/hi boundaries

 inline void
 Interval::setLoBound (const EncodedValue& newLo)
 {
  ((*hist_)[loIndex_]).setBoundary (newLo) ;
 }

 inline void
 Interval::setHiBound (const EncodedValue& newHi)
 {
   HistInt & temp = ((*hist_)[loIndex_+1]);
   temp.setBoundary (newHi);
   hiInt_ = temp;
 }

 // set uec/rowcount information

 inline void
 Interval::setUec (const CostScalar & value)
 {
   HistInt & temp = ((*hist_)[loIndex_+1]);
   temp.setUec (value) ;
   hiInt_ = temp;
 }

 inline void
 Interval::setRowcount (const CostScalar & value)
 {
   HistInt & temp = ((*hist_)[loIndex_+1]);
   temp.setCardinality (value) ;
   hiInt_ = temp;
 }

 inline void
 Interval::setRowsAndUec (const CostScalar & rows,
                          const CostScalar & uec)
 {
   HistInt & temp = ((*hist_)[loIndex_+1]);
   temp.setCardAndUec (rows,uec) ;
   hiInt_ = temp;
 }

 // is this the first/last interval?

 inline NABoolean
 Interval::isFirst() const
 { OK() ; return ( loIndex_ == 0 ) ; }

 inline NABoolean
 Interval::isLast() const
 { OK() ; return ( loIndex_+2 == hist_->entries() ) ; }

 // refresh HiInt
 inline void
 Interval::refreshHiInt()
 { hiInt_ = ((*hist_)[loIndex_+1]); }
 //
 // histogram member functions that interface directly with
 // the Interval member functions
 //


 inline Interval
 Histogram::getFirstInterval() const
 {
   if ( numIntervals() == 0 )
     return Interval() ; // 0 intervals ==> no first exists

   HistogramSharedPtr histPtr = HistogramSharedPtr::getIntrusiveSharedPtr(this);
   return Interval(0,histPtr);
 }

 inline Interval
 Histogram::getLastInterval() const
 {
   if ( numIntervals() == 0 )
     return Interval() ; // 0 intervals ==> no last exists

   HistogramSharedPtr histPtr = HistogramSharedPtr::getIntrusiveSharedPtr(this);
   return Interval(entries()-2,histPtr);
 }

 inline Interval
 Histogram::getLastNonNullInterval() const
 {
   if ( isNullInstantiated() )
     {
       if ( entries() == 2 )
         return Interval() ; // there is only that single, NULL interval

       HistogramSharedPtr histPtr = HistogramSharedPtr::getIntrusiveSharedPtr(this);
       return Interval (entries()-4,histPtr);
     }
   else
     {
       return getLastInterval() ;
     }
 }

 inline Interval
 Histogram::getInterval (CollIndex index) const
 {
   if ( ! (1 <= index && index <= numIntervals()) ) // make sure this function
     return Interval() ;                            // is used properly

   HistogramSharedPtr histPtr = HistogramSharedPtr::getIntrusiveSharedPtr(this);
   return Interval(index-1,histPtr);
 }

 // this special histogramid is checked in ColStats::ColStats
 // constructor to determine whether a histogram is fake.
 const Int32 FAKEHISTOGRAMID=10000;

 // -----------------------------------------------------------------------
 //  Column Statistics
 //
 //  The following class represents statistics for an individual
 //  column or set of columns.  The column(s) are identified by an
 //  NAColumnArray.  This class contains a reference to histogram statistics,
 //  as well as aggregated column statistics.  For some column(s), it
 //  may be possible to have only the aggregated column(s) statistics and
 //  no histogram statistics.
 // -----------------------------------------------------------------------
 class ColStats : public NABasicObject
 {
   static THREAD_P Int64 fakeHistogramIDCounter_;
 public:
   // special fake histogramids are above this value, but,
   // histogramIDs generated by update statistics
   // will be less than this
   static const Int32 USTAT_HISTOGRAM_ID_THRESHOLD=0x7FFFFFFF;
   static ComUID nextFakeHistogramID();
   static NABoolean isUSTATGeneratedHistID(ComUID id);

   // -----------------------------------------------------------------------
   //  Constructors
   // -----------------------------------------------------------------------
   ColStats (ComUID & histid, CostScalar uec = 0, CostScalar rowcount = 0,
             CostScalar baseRowCount = -1,
 	    NABoolean unique = FALSE, NABoolean shapeChanged = FALSE,
 	    const HistogramSharedPtr& desc = 0, NABoolean modified = FALSE,
 	    CostScalar rowRedFactor = 1.0, CostScalar uecRedFactor = 1.0,
                  Int32 avgVarcharSize = 0,
 	    NAMemory* heap=0, NABoolean allowMinusOne=FALSE);

   // copy constructor
   ColStats (const ColStats &other, NAMemory* h, NABoolean assignColArray=TRUE);

   // sometimes we want an uninitialized ColStats object
   ColStats (const HistogramSharedPtr& hist, NAMemory* h) :
        histogram_(hist), columns_(h), heap_(h), histogramID_(0),
        minValue_(UNINIT_ENCODEDVALUE),
        maxValue_(UNINIT_ENCODEDVALUE),
     frequentValues_(h), colPositions_(h), mcSkewedValueList_(h)
   {
     rowcount_     = totalUec_     = baseUec_ = sumOfMaxUec_ = uecBeforePred_ = 0 ;
     baseRowCount_ = -1;
     rowRedFactor_ = uecRedFactor_ = 1 ;
     maxIntervalCount_ = 0 ;
 	maxFreq_ = -1.0;

     avgVarcharSize_ = 0;
     afterFetchIntReductionAttempted_ = FALSE;

     //NB: flags' values *must* be set by set* functions
     setUnique        (FALSE) ;
     setAlmostUnique  (FALSE);
     setModified      (FALSE) ;
     setShapeChanged  (FALSE) ;
     setFakeHistogram (TRUE)  ;
     setOrigFakeHist  (FALSE) ;
     setSmallSampleHistogram (FALSE);
     setMinSetByPred  (FALSE) ;
     setMaxSetByPred  (FALSE) ;
     setRecentJoin    (FALSE) ;
     setUpStatsNeeded (FALSE) ;
     setVirtualColForHist   (FALSE) ;
     setSmallSampleHistogram(FALSE) ;
     setIsCompressed  (FALSE);
     setIsARollingColumn (FALSE);
     setIsColWithBndryConflict (FALSE);
     setSelectivitySetUsingHint (FALSE);
   }
 // deepCopy()
 // Creates a new ColStats and makes a deepCopy of it using other
 static ColStatsSharedPtr deepCopy(const ColStats& other, NAMemory * heap,
                                   NABoolean useColumnPositions=FALSE,
                                   NABoolean copyIntervals=TRUE);

 // copy histogram into cache.
 // use "lean" representation of columns.
 static ColStatsSharedPtr deepCopyHistIntoCache
   (const ColStats& other, NAMemory * heap)
   { return deepCopy(other, heap, TRUE); }

 // creates a deep copy of single-column histogram from cache.
 // sets deep copy's column to col.
 static ColStatsSharedPtr deepCopySingleColHistFromCache
   (const ColStats& other, NAColumn& col, NAMemory * heap,
    NABoolean copyIntervals);

 private:
   // we prevent people from creating ColStats that aren't explicitly put
   // on a heap! code doing this will not link!
   ColStats() ;
   ColStats (const ColStats &other) ;
 public:

   // -----------------------------------------------------------------------
   //  Destructor
   // -----------------------------------------------------------------------
   virtual ~ColStats();
   virtual void deepDelete();
   void deepDeleteFromHistogramCache();
   // ---------------------------------------------------------------------
   //  Accessor functions
   // ---------------------------------------------------------------------
   inline const  NAColumnArray & getStatColumns () const { return columns_     ; }
   inline HistogramSharedPtr getHistogram       () const { return histogram_   ; }
   inline const ComUID& getHistogramId          () const {return histogramID_;}
   inline const NABoolean isSingleIntHist()
     { return getHistogram() ? (getHistogram()->entries()<=2) : TRUE; }
   inline const  EncodedValue & getMinValue     () const { return minValue_    ; }
   inline const  EncodedValue & getMaxValue     () const { return maxValue_    ; }

   inline CollIndex getMaxIntervalCount         () const { return maxIntervalCount_ ; }

   const FrequentValueList &getFrequentValues() const { return frequentValues_; }
   FrequentValueList &getModifableFrequentValues() { return frequentValues_; }
   void setFrequentValue(const FrequentValueList &newFrequentValue) {frequentValues_ = newFrequentValue;}

   const MCSkewedValueList &getMCSkewedValueList() const { return mcSkewedValueList_; }
   void addMCSkewedValue(const NAWchar * boundary, CostScalar frequency);
   void setMCSkewedValueList(const MCSkewedValueList &mcSkewedValueList) {mcSkewedValueList_ = mcSkewedValueList;}

   // since we're careful about only setting >= 0 values for these numbers,
   // we don't have to do a check here
   inline CostScalar getRowcount     () const { return rowcount_         ; }
   inline CostScalar getTotalUec     () const { return totalUec_         ; }
   inline CostScalar getSumOfMaxUec  () const { return sumOfMaxUec_      ; }
   inline CostScalar getRedFactor    () const { return rowRedFactor_     ; }
   inline CostScalar getUecRedFactor () const { return uecRedFactor_     ; }
   inline CostScalar getBaseUec      () const { return baseUec_          ; }
   inline CostScalar getBaseRowCount () const { return baseRowCount_     ; }
   inline CostScalar getUecBeforePreds () const { return uecBeforePred_  ; }

   // report the status of various flags
   inline NABoolean isUnique	      () const { return (_flags_ & _unique_) != 0	     ; }
   inline NABoolean isModified	      () const { return (_flags_ & _modified_) != 0	     ; }
   inline NABoolean isShapeChanged     () const { return (_flags_ & _shapeChanged_) != 0	     ; }
   inline NABoolean isFakeHistogram    () const { return (_flags_ & _isFakeHistogram_) != 0   ; }
   inline NABoolean isObsoleteHistogram() const { return (_flags_ & _isObsoleteHistogram_)!= 0; }
   inline NABoolean isOrigFakeHist     () const { return (_flags_ & _isOrigFakeHist_) != 0    ; }
   inline NABoolean isMinSetByPred     () const { return (_flags_ & _minSetByPred_) != 0	     ; }
   inline NABoolean isMaxSetByPred     () const { return (_flags_ & _maxSetByPred_) != 0	     ; }
   inline NABoolean isRecentJoin	      () const { return (_flags_ & _recentJoin_) != 0	     ; }
   inline NABoolean isUpStatsNeeded    () const { return (_flags_ & _updateStatsNeeded_) != 0 ; }
   inline NABoolean isVirtualColForHist() const { return (_flags_ & _virtualColForHist_) != 0 ; }
   inline NABoolean isAlmostUnique     () const { return (_flags_ & _almostUnique_) != 0	     ; }
   inline NABoolean isSmallSampleHistogram() const { return (_flags_ & _isSmallSampleHistogram_) != 0; }
   inline NABoolean isCompressed       () const { return (_flags_ & _isCompressed_) != 0      ; }
   inline NABoolean isARollingColumn   () const { return (_flags_ & _isARollingColumn_) != 0      ; }
   inline NABoolean isColWithBndryConflict () const { return (_flags_ & _isColWithBndryConflict_) != 0 ; }
   inline NABoolean isSelectivitySetUsingHint () const { return (_flags_ & _selectivitySetUsingHint_) != 0 ; }
   inline NABoolean isMCforHbasePartitioning () const { return (_flags_ & _mcForHbasePartitioning_) != 0 ; }


   Int32 getAvgVarcharSize() const { return avgVarcharSize_; };
   // ---------------------------------------------------------------------
   //  Mutator functions
   // ---------------------------------------------------------------------

   // return a pointer to the object so you can modify it
   inline NAColumnArray & statColumns() { return columns_; }
   HistogramSharedPtr getHistogramToModify();
   ColumnSet& getStatColumnPositions() { return colPositions_; }

   // populate NAColumnArray with this ColumnSet
   void populateColumnArray(const ColumnSet& cols, const NATable* table);

   // populate my NAColumnSet from my ColumnArray
   void populateColumnSetFromColumnArray();

   void createAndAddSkewedValue(const wchar_t *boundary, Interval &iter);
   void createAndAddFrequentValue(const wchar_t *boundary, Interval &iter);

   NABoolean mergeFrequentValues(ColStatsSharedPtr& otherStats,
                                 NABoolean scaleFreq = TRUE,
                                 MergeType mergeMethod = INNER_JOIN_MERGE,
                                 NABoolean adjRowCount = FALSE);

   NABoolean getTotalFreqInfoForIntervalWithValue(EncodedValue mfvEV,
                                                  CostScalar & totalMfvRc,
                                                  CostScalar &mfvCnt) ;

   void createFakeHist();

   void compressToSingleInt();

   // setMinValue
   inline void setMinValue (const NAWchar *theValue)
                          { minValue_ = EncodedValue (theValue, getStatColumns()) ; }
   inline void setMinValue (const EncodedValue & minValue) { minValue_ = minValue ; }
   // setMaxValue
   inline void setMaxValue (const NAWchar *theValue)
                          { maxValue_ = EncodedValue (theValue, getStatColumns()) ; }
   inline void setMaxValue (const EncodedValue & maxValue) { maxValue_ = maxValue ; }

   CostScalar getMaxFreq() const;

   // This method returns the total row count and total UEC of intervals
   // whose frequency is greater than or equal to the threshold value
   void getAccRowCountAboveOrEqThreshold (CostScalar & accRowCnt, /* out */
 										 CostScalar & accUec,   /* out */
 										 CostScalar thresVal = 1.0); /* in optional */

   CostScalar getScaleFactor() const {return scaleFactor_; }

   void computeMaxFreqOfCol(NABoolean forced = FALSE);

   void setMaxFreq(CostScalar freq);

   void setScaleFactor(CostScalar scale)
   { scaleFactor_ = scale; }

   CostScalar getAdjContinuumUEC() const {return adjContinuumUEC_; } ;
   CostScalar getAdjContinuumFreq() const {return adjContinuumFreq_; };

   void setAdjContinuumUEC(CostScalar adjContinuumUEC) { adjContinuumUEC_ = adjContinuumUEC.minCsZero(); }
   void setAdjContinuumFreq(CostScalar adjContinuumFreq) { adjContinuumFreq_ = adjContinuumFreq.minCsZero(); }

   // setHistogram
   inline void setHistogram (const HistogramSharedPtr& dist)   { histogram_ = dist ; }

   // insert zero interval with boundaries equal to min and max and rows and uec from the colstats
   void insertZeroInterval();

   // setMaxIntervalCount() -- this value cannot be less than 2
   // NB: The only place where this fn should be called is in NATable::getStatistics()
   inline void setMaxIntervalCount(CollIndex number)
                                          { maxIntervalCount_ = MAXOF (number, 2) ; }

   // set the rowcount/totalUec values by examining the underlying Histogram
   void setRowsAndUecFromHistogram() ;
   // set the min/min values by examining the underlying Histogram
   void setMaxMinValuesFromHistogram() ;

   // set the columns_ of the Histogram. These are presently being used by the
   // GenericUpdate, when the histograms are created for the top columns of the
   // Update.

   void setStatColumn (NAColumn * column);

   // setting the impt aggregate values -- all of which should always be >= 0 !
   // (and if they're really close to zero, e.g., 1e-16, round them to zero)
 private:
   // these two are private because we never want them used individually
   // ==> use the ::setRowsAndUec() routine instead
   void setRowcount (CostScalar row);

   // We have added the allowMinusOne flag to allow uecs to be
   // initialized to minusOne. This is only used for UDFs, and the minusOne
   // is used to indicate that we do not have valid UEC information for a
   // particular output. The UDF costing code will look for the minusOne flag
   // as an indicator that it needs to compute an UEC from the Functions inputs
   // as a fallback.
   void setTotalUec (CostScalar uec, NABoolean allowMinusOne = FALSE);

 public:
   // the following is used to maintain the semantic of uec <= rows
   // See comment above as to the use of the allowMinusOne flag.
   void setRowsAndUec (CostScalar rows, CostScalar uec, NABoolean allowMinusOne=FALSE) ;

   void setBaseRowCount (CostScalar row);

   void setBaseUec(CostScalar uec);

   // the following is used to store the sum-of-max-uec-per-interval value in
   // mergeColStats, for later perusal/resetting in estimateCardinality
   void setSumOfMaxUec (CostScalar value);

   // the following is used to store the base UEC for that column before applying
   // predicate to it.
   void setUecBeforePred (CostScalar value)
                                   { uecBeforePred_ = value ; }

   // we have to be extremely careful about rounding the reduction factors
   // because they can legitimately become very close to zero but not equal
   // to zero (e.g., join between 2 1-billion row tables returns 1 row ==>
   // redfactor == 1e-18)
   void setRedFactor (CostScalar rowred);

   void setUecRedFactor (CostScalar uecred);

   // setting the flags
   inline void setUnique		   (NABoolean flag)	   { flag ? _flags_ |= _unique_		     : _flags_ &= ~_unique_;	         }
   inline void setModified	   (NABoolean flag = TRUE) { flag ? _flags_ |= _modified_	     : _flags_ &= ~_modified_;	         }
   inline void setShapeChanged	   (NABoolean flag = TRUE) { flag ? _flags_ |= _shapeChanged_	     : _flags_ &= ~_shapeChanged_;       }
   inline void setFakeHistogram	   (NABoolean flag = TRUE) { flag ? _flags_ |= _isFakeHistogram_     : _flags_ &= ~_isFakeHistogram_;    }
   inline void setObsoleteHistogram (NABoolean flag = TRUE) { flag ? _flags_ |= _isObsoleteHistogram_ : _flags_ &= ~_isObsoleteHistogram_;}
   inline void setOrigFakeHist	   (NABoolean flag = TRUE) { flag ? _flags_ |= _isOrigFakeHist_	     : _flags_ &= ~_isOrigFakeHist_;     }
   inline void setMinSetByPred	   (NABoolean flag)        { flag ? _flags_ |= _minSetByPred_	     : _flags_ &= ~_minSetByPred_;       }
   inline void setMaxSetByPred	   (NABoolean flag)        { flag ? _flags_ |= _maxSetByPred_	     : _flags_ &= ~_maxSetByPred_;       }
   inline void setRecentJoin	   (NABoolean flag = TRUE) { flag ? _flags_ |= _recentJoin_	     : _flags_ &= ~_recentJoin_;	 }
   inline void setUpStatsNeeded	   (NABoolean flag)        { flag ? _flags_ |= _updateStatsNeeded_   : _flags_ &= ~_updateStatsNeeded_;  }
   inline void setVirtualColForHist (NABoolean flag = TRUE) { flag ? _flags_ |= _virtualColForHist_   : _flags_ &= ~_virtualColForHist_;  }
   inline void setAlmostUnique	   (NABoolean flag)        { flag ? _flags_ |= _almostUnique_        : _flags_ &= ~_almostUnique_;       }
   inline void setSmallSampleHistogram (NABoolean flag = TRUE) { flag ? _flags_ |= _isSmallSampleHistogram_ : _flags_ &= ~_isSmallSampleHistogram_; }
   inline void setIsCompressed      (NABoolean flag = TRUE) { flag ? _flags_ |= _isCompressed_        : _flags_ &= ~_isCompressed_;       }
   inline void setIsARollingColumn  (NABoolean flag = TRUE) { flag ? _flags_ |= _isARollingColumn_    : _flags_ &= ~_isARollingColumn_;       }
   inline void setIsColWithBndryConflict (NABoolean flag = TRUE) { flag ? _flags_ |= _isColWithBndryConflict_ : _flags_ &= ~_isColWithBndryConflict_; }
   inline void setSelectivitySetUsingHint (NABoolean flag = TRUE) { flag ? _flags_ |= _selectivitySetUsingHint_ : _flags_ &= ~_selectivitySetUsingHint_; }
   inline void setMCforHbasePartitioning (NABoolean flag = TRUE) { flag ? _flags_ |= _mcForHbasePartitioning_ : _flags_ &= ~_mcForHbasePartitioning_; }

   // a minor variation on a THIS = OTHER assignment operator
   void overwrite (const ColStats &other) ;

   // ---------------------------------------------------------------------
   //  Comparison of two column statistics :
   //  Column statistics are equivalent if they are on the same set of cols
   // ---------------------------------------------------------------------
   inline NABoolean operator== (const ColStats & other)
   { return (this == &other) ; }

   // ---------------------------------------------------------------------
   // NULL handling routines
   // --> trying to make the handling of NULLs a little easier ...
   // ---------------------------------------------------------------------

   // is there a NULL interval in the Histogram?
   NABoolean isNullInstantiated () const ;

   // removing that NULL interval, if it exists
   void removeNullInterval () ;

   // inserting a NULL interval, if it doesn't already exist
   void insertNullInterval () ;

   // reporting the number of NULLs / NULL-uecs in that interval
   CostScalar getNullCount () const ;
   CostScalar getNullUec ()   const ;

   // setting the number of NULLs and NULL-uecs in that interval
   void setNullRowsAndUec (CostScalar nulls, CostScalar nullUec) ;

   // ---------------------------------------------------------------------
   //  Histogram Manipulation routines
   // ---------------------------------------------------------------------
   // Following operations such as joins a histogram may have a series of
   // intervals containing zero rows.  In that situation, compress out the
   // redundant empty histogram intervals.
   void removeRedundantEmpties() ;

   void modifyStats (ItemExpr * pred, CostScalar *maxSelectivity=NULL);

   void simplestPreds (ItemExpr * pred) ;

   // Synthesize the effect of column <(=) upBound
   void newUpperBound (const EncodedValue & upBound, ConstValue* constExpr, NABoolean boundIncl);


   // Synthesize the effect of column >(=) lowBound
   void newLowerBound (const EncodedValue & lowBound, ConstValue* constExpr, NABoolean boundIncl);
   // Synthesize the effect of an equality predicate against a constant that
   // covers all columns of the histogram:
   // i.e. reduce the histogram to a single, single-valued, interval.
   void setToSingleValue (const EncodedValue & newValue, ConstValue* constExpr,
                          CostScalar *totalRows=NULL, FrequentValue* fv=NULL);

   // a helper function called by ::setToSingleValue() and ::isNull()
   //
   // does the work of removing all HistInts and adding 2 so that
   // the resulting ColStats has the corresponding min/max values
   void setToSingleInterval (const EncodedValue & newLoBound,
                             const EncodedValue & newUpBound,
                             CostScalar numRows,
                             CostScalar numUecs) ;

   // Synthesize the effect of column NOT= newValue
   void removeSingleValue (const EncodedValue & newValue, ConstValue* consExpr);

   // THIS contains a histogram template (created by createMergeTemplate());
   // adjust this template to have uec/row contents equivalent to
   // those in 'other.'
   void populateTemplate(const ColStatsSharedPtr& other) ;

   // NB: this used to be a Histogram member function named
   // 'adjustBoundaries', but this didn't describe what the function does

   // Synthesize the effect of
   //         IS [NOT] NULL and IS [NOT] UNKNOWN
   void isNull (NABoolean notFlag);

   // Do the work of clearing the Histogram and nullifying the
   // aggregate information
   void clearHistogram() ;

   // Finds where in the list of HistInts to place the new HistInt.  Then,
   // divides the rows/uecs from the divided Interval into the two new
   // Intervals (or, if this HistInt boundary already exists, jumps to next
   // step).
   //
   // Finally, removes the intervals above or below the indentified interval
   // boundary.  That is, for < operations, we remove all Intervals above
   // this one; for > ops, we destory all Histints below it.
   void divideHistogramAlongBoundaryValue(const EncodedValue & value,
                                          OperatorTypeEnum splitOperator) ;

   // simple helper functions that prune the Histogram above/below
   // the given boundary Interval; also, sets the s-c flag if
   // anything's actually deleted
   //
   // NB: the second of these invalidates the parameter Interval
   void deleteIntervalsAbove (const Interval & boundary) ;
   void deleteIntervalsBelow (Interval & boundary) ;

   void populateTemplateOfFakeHist(const ColStatsSharedPtr& fakeHistogram,
                                   const ColStatsSharedPtr& realHistgoram);

   // Perform an inner- or semi-join equality predicate, or an 'OR' union
   //   operation on two ColStats, updating THIS with the result.
   void mergeColStats (const ColStatsSharedPtr& otherStats,
                       MergeType mergeMethod, NABoolean isNumeric,
                       OperatorTypeEnum exprOpCode,
                       NABoolean mergeFVs=TRUE) ;

   // ------------------------------------------------------------
   // minSetByPred_ , maxSetByPred_ flags indicate if the boundaries
   // for the histograms were set by application of predicates. The values
   // for these flags for the target merged histogram are calculated below
   // ------------------------------------------------------------
   void setMaxAndMinSetByPredFlags (const ColStatsSharedPtr& otherStatsCopy,
 				       NABoolean &maxSetByPred,
 				       NABoolean &minSetByPred);
   // ---------------------------------------------------------------------
   // graceful recovery in case of any error while merging two histograms,
   // ---------------------------------------------------------------------
   void recoverFromMergeColStats(const ColStatsSharedPtr& otherStats,
 			        NABoolean isNumeric,
 			        MergeType mergeMethod = INNER_JOIN_MERGE);

   // ---------------------------------------------------------------------
   // The join cardinality can be computed either by merging histogram
   // intervals or merging frequent value lists. In this method we compute
   // join cardinality using histogram intervals
   // ----------------------------------------------------------------------
   CostScalar mergeWithExpandedHistograms (const ColStatsSharedPtr& otherStats,
 			                                    NABoolean isNumeric,
 				                                  CostScalar & newRowcount,
 				                                  CostScalar & newUec,
 			                                    MergeType mergeMethod = INNER_JOIN_MERGE);


   // ---------------------------------------------------------------------
   // The join cardinality can be computed either by merging histogram
   // intervals or merging frequent value lists. In this method we compute
   // join cardinality using frequent values
   // ----------------------------------------------------------------------
   CostScalar mergeCompressedHistograms (const ColStatsSharedPtr& otherStats,
 				                                CostScalar & newRowcount,
 				                                CostScalar & newUec,
 			                                  MergeType mergeMethod = INNER_JOIN_MERGE);

   // --------------------------------------------------------------------
   // adjust selectivity computed by either merging histogram intervals
   // or frequent value lists to take into account any indirect reductions
   // ---------------------------------------------------------------------
   CostScalar adjustSelectivity(const ColStatsSharedPtr& otherStats,
                                const CostScalar & newUec,
 			                         MergeType mergeMethod = INNER_JOIN_MERGE);


   // ------------------------------------------------------------------------
   // populate left and right histogram templates created for merge. The
   // histograms will be populated based on if the stats exist for both
   // children or not
   // ------------------------------------------------------------------------
   NABoolean populateLeftAndRightTemplates(const ColStatsSharedPtr & otherStatsCopy,
 					  HistogramSharedPtr & leftHistogram,
 					  HistogramSharedPtr & rightHistogram,
 					  HistogramSharedPtr & targetHistogram);

   // -----------------------------------------------------------------------
   // Histogram intervals reduction routines
   // -----------------------------------------------------------------------

   // calculates the reduction criterion to apply for reduction of the
   // number of histogram intervals
   Criterion decideReductionCriterion(Source invokedFrom,
                                      Criterion reductionCriterion,
                                      const NAColumn * column,
                                      NABoolean ignoreHistogramCachingFlag=FALSE);

   // tracks whether a reduction of histograms after fetch histograms was attempted on this colStats
   NABoolean afterFetchIntReductionAttempted () {return afterFetchIntReductionAttempted_; }
   void setAfterFetchIntReductionAttempted () { afterFetchIntReductionAttempted_ = TRUE;}

   //reduce the number of histogram intervals in the histogram
   //referenced by this ColStats object. The reduction criterion
   //depends on parameters invokedFrom, and reductionCriterion
   void reduceNumHistInts(Source invokedFrom,
                          Criterion reductionCriterion);

   // -----------------------------------------------------------------------
   // This utility routine is used by costing to determine the number of
   // key predicate 'probes' performed during a Nested Join which did not
   // produce any result rows.
   // THIS provides the ColStats of the appropriate columns in the Input
   // EstLogProp;  otherStats provides the result of the key predicate join
   // done with the base table.   An INNER Join is assumed.
   // -----------------------------------------------------------------------
   CostScalar countFailedProbes(const ColStatsSharedPtr& otherStats) const;

   // -----------------------------------------------------------------------
   // in the given ColStats, replace the current histogram with a copy that
   // has had all of its interval's rowcounts multiplied by the specified
   // scale.
   // -----------------------------------------------------------------------
   void copyAndScaleHistogram(CostScalar scale) ;

   // -----------------------------------------------------------------------
   // Just replace the current histogram buckets with values which have the
   // reduction factors applied.  An additional scale can be applied.
   //
   // Also, if any intervals have rows<1, rows>0, we merge these with their
   // neighbors.
   // -----------------------------------------------------------------------
   void scaleHistogram (CostScalar scale, CostScalar uecScale = 1, NABoolean scaleFreqValueList = TRUE) ;

   // -----------------------------------------------------------------------
   // Increase the rowcount by adding some NULLs; specifically, adding
   //       targetRowCount - rowcount_ NULLs
   // -----------------------------------------------------------------------
   void nullAugmentHistogram(CostScalar targetRowCount);

   // -----------------------------------------------------------------------
   // Set row counts to match the UECs.
   // i.e., unique-ify the statistics on this column
   // -----------------------------------------------------------------------
   void makeGrouped();


   // compress the ColStats for local predicates on a column
   // The local predicates should involve a constant
   // e.g.
   // * t1.col1 = 3
   // * t1.col1 < 3
   // * t1.col1 > 1
   // * t1.col1 > 1 and t1.col1 < 3
   // * t1.col1 = t2.col1 // i.e. join predicate
   void compressColStatsForQueryPreds(ItemExpr * lowerBound,
                                      ItemExpr * upperBound,
                                      NABoolean  hasJoinPred=FALSE);

   // -----------------------------------------------------------------------
   // The user has set a value in the defaults table
   // <HIST_MAX_NUMBER_OF_INTERVALS> to specify the maximum number of
   // intervals that he wants.  This value can be used to test the
   // tradeoffs between compile-time and rowcount estimation accuracy --
   // because histograms with large numbers of intervals are believed to
   // slow down compilation speed significantly.
   //
   // This routine goes through the Histogram and merges intervals as
   // necessary to get to this upper bound (the data member maxIntervalCount_
   // stores the value from the defaults table).
   // -----------------------------------------------------------------------
   void reduceToMaxIntervalCount() ;

   //
   // This routine goes through the Histogram and split/merge intervals as
   // necessary to evenly distribute the # of rows into numOfIntervals
   // intervals.
   //
   // For column types that allow 1 to 1 mapping to the
   // encoded value (as double), the split point can be one of the following
   //
   //   1. one or more values before the most frequent value (MFV)
   //   2. the most frequent value (MFV)
   //   3. one or more values after the most frequent value (MFV)
   //
   // Otherwise, only two types of splitting are possible.
   //   1. the most frequent value (MFV)
   //   2. one or more values after the most frequent value (MFV)

   HistogramSharedPtr transformOnIntervals(Int32 numOfIntervals) ;
   HistogramSharedPtr transformOnIntervalsForMC(Int32 numOfIntervals) ;

   //Helper method to adjust Rowcount for rolling columns
   void adjustRowcountforRollingColumns(ConstValue * constant);

   // Adjust the max selectivity via the following steps:
   // 1. If 'value' is in the frequentvaluelist, update the max card with it;
   // 2. Otherwise, find the interval for 'value', update the max card with
   //    the MAX of the 2nd most frequent value, and the average rowcount
   //    in the interval.
   // The seach is using either the normValue or constExpr argument
   // depending on the data types of the columns on which the frequent
   // list is built. The search chooses an argument that will yield the
   // best resolution.
   void adjustMaxSelectivity(const EncodedValue& normValue,
                             ConstValue* constExpr,
                             CostScalar *totalRows,
                             CostScalar *maxSelectivity);

   // -----------------------------------------------------------------------
   //  Utility functions
   // -----------------------------------------------------------------------

   void display() const;

   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL,
               NABoolean hideDetail=FALSE) const;

   void trace (FILE *f, NATable* table);

   INTRUSIVE_SHARED_PTR(ColStats);

 private:
   // -----------------------------------------------------------------------
   // When one, or both, of the two to-be-combined column statistics has no
   // histogram it is still possible to (sometimes) create a useful result
   // histogram.  This private utility routine attempts to deal with that
   // case.
   // -----------------------------------------------------------------------
   void mergeWithEmptyHistogram (const ColStatsSharedPtr& otherStats,
 				MergeType mergeMethod) ;

   // -----------------------------------------------------------------------
   // This is a helper method used by mergeColStats() to recover from merge
   // template that consists of zero intervals.
   // -----------------------------------------------------------------------
   NABoolean handleMergeTemplateWithZeroIntervals(const ColStatsSharedPtr& otherStats,
 						  HistogramSharedPtr& leftHistogram);

   // -----------------------------------------------------------------------
   // This is a helper method for determining if the merge is for a join
   // -----------------------------------------------------------------------
   NABoolean isAJoinRelatedMerge(MergeType mergeMethod) const;

   // -----------------------------------------------------------------------
   // This is a helper method for reducing intermediate histograms
   // -----------------------------------------------------------------------
   void reduceIntermediateHistInts(MergeType mergeMethod, NABoolean isNumeric);

 // ----------------------------------------------------------------------
 // attributes:
 // ----------------------------------------------------------------------
   NAColumnArray columns_;  // columns for which we have statistics

   ColumnSet colPositions_;

   // NB: most of the time this array contains only a SINGLE NAColumn *,
   // since the histogram manipulation code does not (and probably never
   // will, due to the complexity) support multicolumn histogram synthesis.
   //
   // ==> however, we can't turn this field into a NAColumn* because class
   // StatsList uses this class to store multicolumn statistics information
   // that it's read from the catalog tables; later on, it's converted into
   // multicolumn uec information; see the files:
   //
   // * NATable.cpp, NATable::getStatistics()
   // here the multicolumn uec information is filtered out, but not
   // before it's stored into two lists inside the StatsList object so
   // that it can be used later
   //
   // * TableDesc.cpp, TableDesc::getColStats()
   // here's where the two StatsList lists containing the multicolumn
   // uec information is converted into a MultiColumnUecList
   //
   // * ColStatDesc.cpp
   // this file contains the class MultiColumnUecList, which is a (read-only)
   // datamember of ColStatDesc

   // the underlying histogram
   ComUID histogramID_;           // ID of histogram
   HistogramSharedPtr histogram_; // the histogram itself

   //reduction factors used for efficiency
   CostScalar uecRedFactor_;  // uec reduction factor
   CostScalar rowRedFactor_;  // reduction factor: if <> 1, need to multiply
   //                         // each interval's rowcount by redFactor

   //Field for refining selectivity
   CostScalar baseUec_;       // base uec for object (initial uec or
   //                         // direct manipulation count

   CostScalar uecBeforePred_;  // uec before applying predicates. This would be different
 			      // than base UEC for columns on which the predicate is
 			      // being applied

   CostScalar baseRowCount_;  // base rowcount (initial row count)
   CostScalar sumOfMaxUec_; // during a join, we sum up the max uec per
   //                       // interval; used in the rowcount adjustment
   //                       // when we use multicolumn uec info

   // the four aggregate values
   CostScalar totalUec_ ;     // total unique entry count for object
   CostScalar rowcount_ ;     // total rowcount
   EncodedValue minValue_ ;   // lower bound (in encoded format)
   EncodedValue maxValue_ ;   // upper bound (in encoded format)
   CostScalar maxFreq_;		 // max frequency of any calue in this colStats
   CostScalar scaleFactor_;  // used to adjust the maximum frequency to take care
 							 // of the cartesian product factor

   MCSkewedValueList mcSkewedValueList_; // List of skewed values for MC histograms
   FrequentValueList frequentValues_; // List of skewed values
   CostScalar adjContinuumUEC_;
   CostScalar adjContinuumFreq_;

   //A bit map to represent all the boolean members used in ColStats
   UInt32 _flags_;

   // These following enum values represent the boolean switches used by ColStats class
   enum Flags
   {
     _unique_                     = 0x00000001,    // uniqueness constraint; not used much
     _modified_                   = 0x00000002,    // indicates that the referenced Histogram has not yet been modified
     _shapeChanged_               = 0x00000004,    // Has the identified histogram changed its shape?
     _isFakeHistogram_            = 0x00000008,    // Is histogram fake still/originally due to update stats?
     _isOrigFakeHist_             = 0x00000010,    // Is histogram originally fake due to update stats?
     _minSetByPred_               = 0x00000020,    // lower bound enforced due to predicate
     _maxSetByPred_               = 0x00000040,    // upper bound enforced due to predicate
     _recentJoin_                 = 0x00000080,    // is this Histogram the result of a (very) recent Join?
                                                   // (flag set in mergeColStats, checked/unset in estCard)
     _virtualColForHist_          = 0x00000100,    // Flag set if the histogram is created for a virtual column such as
                                                   // that for Transpose expression or Rowset
     _updateStatsNeeded_          = 0x00000200,    // Is this histogram's rowcount so large that it should have its
                                                   // statistics updated?  (This histogram may or may not be fake.)  This
                                                   // flag's value comes from the number of rows in the histogram and the
                                                   // defaults-table value HIST_ROWCOUNT_REQUIRING_STATS.
     _almostUnique_               = 0x00000400,    // This flag indicates uniqueness based on stats
     _isObsoleteHistogram_        = 0x00000800,    // Is histogram obsolete in comparison with other histograms for the same table?
     _isSmallSampleHistogram_     = 0x00001000,    // Is histogram from a small sample (compile time stats).
     _isCompressed_               = 0x00002000,    // FALSE means that the histogram has
     _isARollingColumn_           = 0x00004000,    // Flag to set if the histogram represents a rolling column.
     _isColWithBndryConflict_     = 0x00008000,    // Flag to indicate that there was a conflicting interval boundary in the
                                                   // histogram as a result the intervals were merged
     _selectivitySetUsingHint_    = 0x00010000,    // User-specified selectivity was set for this histogram
     _mcForHbasePartitioning_     = 0x00020000     // used by hbase to partition data
   };

   CollIndex maxIntervalCount_; // maximum # of intervals the user wants (this can
   //                           // be used to compare tradeoffs in compile-time &
   //                           // rowcount estimation accuracy).

   Int32 avgVarcharSize_;         // average number of chars in columns[0], if
                                // it is of VARCHAR type.

   NABoolean afterFetchIntReductionAttempted_; // was an attempted made after histogram fetch
                                               // to reduce the number of intervals for this colStats

   NAMemory* heap_;           // the NAMemory* for dynamic allocation.
 }; // ColStats

 // ----------------------------------------------------------------------
 // the inline NULL-handling ColStats methods
 // ----------------------------------------------------------------------

 inline NABoolean
 ColStats::isNullInstantiated() const
 { return histogram_->isNullInstantiated() ; }


 // inserting a NULL interval, if it doesn't already exist
 inline void
 ColStats::insertNullInterval()
 {
   if ( !isNullInstantiated() )
     {
       histogram_->insertNullInterval () ;
       setShapeChanged (TRUE) ;
     }
 }

 // Are the histograms being merged as part of a join operation
 inline NABoolean
 ColStats::isAJoinRelatedMerge(MergeType mergeMethod) const
 {
   if((mergeMethod == INNER_JOIN_MERGE) ||
      (mergeMethod == SEMI_JOIN_MERGE) ||
      (mergeMethod == ANTI_SEMI_JOIN_MERGE) ||
      (mergeMethod == OUTER_JOIN_MERGE))
      return TRUE;
   else
     return FALSE;
 }

 // -----------------------------------------------------------------------
 //  Statistics List
 //    A collection of column statistics.  Column statistics
 //    belong to the same set if used for the same equivalence "group"
 //    (as defined by Cascades).
 // -----------------------------------------------------------------------
 class StatsList : public SHPTR_LIST(ColStatsSharedPtr)
 {
 public:
   StatsList(NAMemory* h,CollIndex initLen =0)
     : heap_(h),
     SHPTR_LIST(ColStatsSharedPtr) (h,initLen),
     groupUecColumns_(h),
     groupUecValues_(h),
     groupMCSkewedValueLists_(h)
   {}

   // ---------------------------------------------------------------------
   //  Virtual Destructor
   // ---------------------------------------------------------------------
   virtual ~StatsList();
   virtual void deepDelete();

   // ---------------------------------------------------------------------
   //  Is list empty?
   // ---------------------------------------------------------------------
   NABoolean is_empty () const { return entries() == 0; }

   void display () const;

   void print (FILE *f = stdout,
 	      const char * prefix = DEFAULT_INDENT,
 	      const char * suffix = "",
               CollHeap *c=NULL, char *buf=NULL) const;

   void trace (FILE *f, NATable* table) const;

   //Copy constructor with user specified heap
   StatsList(const StatsList& other, NAMemory * heap):
               heap_(heap),
               SHPTR_LIST(ColStatsSharedPtr) (other,heap),
               groupUecColumns_(other.groupUecColumns_,heap),
               groupUecValues_(other.groupUecValues_,heap),
               groupMCSkewedValueLists_(other.groupMCSkewedValueLists_,heap)
   {};
   // Copy constructor
   StatsList(const StatsList& other):
 				heap_(other.heap_),
 				SHPTR_LIST(ColStatsSharedPtr) (other,heap_),
 				groupUecColumns_(other.groupUecColumns_,heap_),
 				groupUecValues_(other.groupUecValues_,heap_),
 				groupMCSkewedValueLists_(other.groupMCSkewedValueLists_,heap_)
 	{};

   StatsList& operator=(const StatsList& list);

   //reduce the number of histogram intervals in the histograms
   //referenced by the ColStats that are referenced by this StatsList object.
   //The reduction criterion depends on parameters invokedFrom,
   //and reductionCriterion
   void reduceNumHistInts(Source invokedFrom,
                          Criterion reductionCriterion);

   //reduce the number of histogram intervals for histograms
   //referenced by the ColStats that make up this StatsList
   void reduceNumHistIntsAfterFetch(NATable& table);

   // deepCopy()
   // Makes a deep copy of all of its members
   void deepCopy(const StatsList & other, NAMemory * heap);

   // insertByPosition()
   // insert the histograms that have reference to 'position' column
   void insertByPosition(const StatsList & other, const Lng32 position,
                         SET(ColStats*) &dupList);

   // insertCompressedCopy()
   // Takes in full histogram but insert it in compressed form
   ColStatsSharedPtr insertCompressedCopy(const StatsList & realStat,
 						const Lng32 position,NABoolean state);

   // insertDeepCopyList()
   // makes a deep copy from the other
   void insertDeepCopyList(const StatsList & other);

   //Returns a reference to a ColStats object representing
   //single column statistics for the column identified by
   //parameter position.
   ColStatsSharedPtr getSingleColumnColStats(const Lng32 position);

   // returns the UEC count from the histogram identified by the parameter
   //position. Position here is the position of the column in the table
   CostScalar getSingleColumnUECCount(const Lng32 position) const;

   // return count of single column histograms (include fake histograms)
   Int32 getSingleColumnCount() const;

   // return count of multi-column histograms
   Int32 getMultiColumnCount() const;

   // return true if all fake histograms or is empty
   NABoolean allFakeStats() const;

   NAMemory * heap() const { return heap_; }

 private:
   NAMemory * heap_;

 public:

   // these two data members are intentionally left public; otherwise, we
   // need accessor functions, which would be ridiculous for such a small
   // and infrequently-used class

   // these two data members hold the groupUec information that we read in
   // from the system catalogs; we can't convert this data to a
   // MultiColumnUecList until we get to TableDesc::getStatColumns(),
   // because before then we can't do the necessary (NAColumn -> ValueId)
   // mapping
   LIST(NAColumnArray) groupUecColumns_ ;
   LIST(CostScalar)    groupUecValues_ ;
   LIST(MCSkewedValueList)    groupMCSkewedValueLists_ ;
 }; //StatsList


 // -----------------------------------------------------------------------
 //  Skewed Value List
 //    A list of skewed values.
 // -----------------------------------------------------------------------
 class SkewedValueList : public LIST(EncodedValue)
 {
 public:
   SkewedValueList(NAType* nt, NAMemory* h, CollIndex initLen = 0) :
     natype_(nt),
     heap_(h),
     isNullInList_(FALSE),
     isNullSkewed_(FALSE),
     LIST(EncodedValue)(h,initLen),
     finalHashComputed_(TRUE) {};

   SkewedValueList(NAMemory* h, CollIndex initLen = 0) :
     natype_(NULL),
     heap_(h),
     isNullInList_(FALSE),
     isNullSkewed_(FALSE),
     LIST(EncodedValue)(h,initLen),
     finalHashComputed_(TRUE) {};

    ~SkewedValueList() {};

    // insert a skewed value in decending order.
    void insertInOrder(const EncodedValue&);

    // get the list in human-readable form: [v1, v2, ..., vn]
    const NAString getText() const;
    const NAType* getNAType() const { return natype_; };

    NABoolean getIsNullInList() const
    {
       return isNullInList_;
    }

    void setIsNullInList(NABoolean v)
    {
       isNullInList_ = v;
    }

    NABoolean getIsNullSkewed() const
    {
       return isNullSkewed_;
    }

    void setIsNullSkewed(NABoolean v)
    {
       isNullSkewed_ = v;
    }

     NABoolean hasOnlyNonSkewedNull() const
     {
       return (!getIsNullSkewed() &&
               getIsNullInList() &&
               entries()==1);
     }

    NABoolean needToComputeFinalHash() const { return finalHashComputed_; };
    void setComputeFinalHash(NABoolean x) { finalHashComputed_ = x; };

 private:
   NAType*  natype_;
   NAMemory * heap_;
   // TRUE if NULL is in the list regardless of whether it is
   // skweded or no
   NABoolean isNullInList_;
   // TRUE if NULL is  skewed value
   NABoolean isNullSkewed_;

   NABoolean finalHashComputed_;
 };


 #endif