core/sql/optimizer/ColStatDesc.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         ColStatDesc.cpp
  * Description:  Column Statistics Descriptor
  * Created:      June 7, 1995
  * Language:     C++
  *
  *
  *
  *****************************************************************************
  */

 // -----------------------------------------------------------------------
 #define   SQLPARSERGLOBALS_FLAGS        // must precede all #include's
 #include "ColStatDesc.h"
 #include "Sqlcomp.h"
 #include "ItemColRef.h"
 #include "ItemOther.h"
 #include "ItemFunc.h"
 #include "Cost.h"              /* for lookups in defaults table */
 #include "Analyzer.h"
 #include "../exp/exp_ovfl_ptal.h" //check for overflow & underflow
 #include "CompException.h"
 #include "ItemLog.h"	    // for like predicates
 #include "hs_globals.h"   // for ustat automation setting
 #include "hs_cli.h"       // ustat automation, insert empty histograms
 #include "hs_log.h"       // ustat log
 #include "SqlParserGlobals.h"
 #include "CmpDescribe.h"

 #ifdef DEBUG
 #define HISTWARNING(x) fprintf(stdout, "Histogram optimizer warning: %s\n", x);
 #else
 #define HISTWARNING(x)
 #endif


 //#define MONITOR_SAMEROWCOUNT

 // This is an arbitrary constant; perhaps someday it will end up in the
 // defaults table.
 //
 // HIST_MAX_IN_LIST_MEMBERS: the largest number of IN-list members for
 // which we attempt to do exact histogram-manipulation; for IN-lists of
 // cardinality greater than this constant, we do some massive
 // simplifications (see CSDL::estimateCardinality)
 const Int32 HIST_MAX_IN_LIST_MEMBERS = 40 ;

 // -----------------------------------------------------------------------
 //  Methods on ColStatDesc class
 // -----------------------------------------------------------------------

 ColStatDesc::ColStatDesc (const ColStatsSharedPtr& stats,
                           const ValueIdList& columnList,
                           NAMemory * h) :
      nonVegEquals_(h)
 {
   // Set up a reference to the ColStats structure
   colStats_ = stats;

   // ---------------------------------------------------------------------
   // Now, generate the valueidlist for this set of column statistics.
   // Also, generate the initial mergeState_ for this stats' column.
   // Initially, the state is that only the current column has been merged.
   // ---------------------------------------------------------------------
   const ValueId & id = columnList[stats->getStatColumns()[0]->getPosition()];
   column_ = id ;
   VEGcolumn_ = column_;
   mergeState_.insert(column_);

   fromInnerTable_ = FALSE;

   // use default constructor for nonVegEquals, and appliedPreds_

   // underlying histogram is not modified yet
   modified_ = FALSE;
   inputCard_ = 1.0;
 }

 // Construct a ColStatDesc given a single ValueId and a pointer
 // to a ColStats object.  This constructor does not assume that
 // the ColStats object contains a NAColumnList.  Used when constructing
 // ColStats for generated columns that are not based on BASECOLS,
 // such as with TRANSPOSE.
 //
 ColStatDesc::ColStatDesc(const ColStatsSharedPtr& stats,
                          const ValueId & column, NAMemory * h)
      : nonVegEquals_(h)
 {
   // Set up a reference to the ColStats structure
   colStats_ = stats;

   // ---------------------------------------------------------------------
   // Set up the column, VEGColumn and MergeState.
   // ---------------------------------------------------------------------
   column_ = column ;
   VEGcolumn_ = column;

   mergeState_.insert(column) ;

   fromInnerTable_ = FALSE;

   // underlying histogram is not modified yet
   modified_ = FALSE;

   inputCard_ = 1.0;
 }

 void
 ColStatDesc::copy (const ColStatDesc& other)
 {
   column_         = other.column_;
   VEGcolumn_      = other.VEGcolumn_;
   mergeState_     = other.mergeState_;
   nonVegEquals_   = other.nonVegEquals_;
   fromInnerTable_ = other.fromInnerTable_;
   appliedPreds_   = other.appliedPreds_;
   colStats_       = other.colStats_;
   inputCard_	  = other.inputCard_;
   modified_       = FALSE;
 }

 void
 ColStatDesc::mapUpAndCopy (const ColStatDesc& other, ValueIdMap &map)
 {
   copy(other);
   map.mapValueIdUp(column_, other.column_);
   map.mapValueIdUp(VEGcolumn_, other.VEGcolumn_);

   // if the map only maps the column or VEGcolumn but not both, then
   // map both to the same value id
   if (column_ == other.column_ && VEGcolumn_ != other.VEGcolumn_)
     column_ = VEGcolumn_;
   if (VEGcolumn_ == other.VEGcolumn_ && column_ != other.column_)
     VEGcolumn_ = column_;

   // rewrite applied predicates
   if (!appliedPreds_.isEmpty() &&
       (column_ != other.column_ || VEGcolumn_ != other.VEGcolumn_))
     {
       appliedPreds_.clear();
       map.rewriteValueIdSetUp(appliedPreds_, other.appliedPreds_);
     }
 }

 void
 ColStatDesc::deallocate()
 {
   colStats_ = NULL;
 }

 // -----------------------------------------------------------------------
 //  ColStatDesc::getColStatsToModify
 //
 //  If this is the first time that the underlying colstats is being
 //  modified, then make a copy first. Otherwise, just return the reference
 //  to colStats.
 // -----------------------------------------------------------------------
 ColStatsSharedPtr
 ColStatDesc::getColStatsToModify()
 {
   if (NOT isModified())
   {
     colStats_ = ColStatsSharedPtr(new (HISTHEAP)
 				  ColStats (*colStats_, HISTHEAP));
     modified_ = TRUE;
   }

   return colStats_;
 }

 // argh!  pow(x,0.0) returns not-a-number!
 //
 // This version of pow has been written specially to do the right thing
 // for its call in the following function ; use of it by any other method
 // is probably not a good idea.
 double mypow (double base, double exp)
 {
   if ( exp == 0 )
     return 1 ; // duh!
   else if ( base <= 0 )
     return 0 ; // this isn't obvious ... hmmmm ...
   else
     return pow(base,exp) ;
 }

 // -----------------------------------------------------------------------
 // Historically we've reduced UEC values by the same amount as rowcounts
 // when a predicate is applied; but this is extremely foolish, as UEC
 // values generally do not reduce very rapidly as rowcounts go down (e.g.,
 // a "downsizing" of 50% of employees (reduction of .5) is not likely to
 // reduce the UEC of the genders of employees by the same .5!)  In general,
 // we expect UEC to go down only when rowcount goes down a lot.
 //
 // Making this a member function so that anyone else who wants to use it
 // will be able to "find" it easier -- and so it doesn't pollute the
 // global namespace.
 // -----------------------------------------------------------------------
 CostScalar
 ColStatDesc::calculateCorrectResultUec( const CostScalar & baseRows,
                                         const CostScalar & newRows,
                                         const CostScalar & baseUec)
 {
   // Sanity checks.
   CCMPASSERT( baseRows.isGreaterOrEqualThanZero() );
   CCMPASSERT( newRows.isGreaterOrEqualThanZero() );
   CCMPASSERT( baseUec.isGreaterOrEqualThanZero() );

   // we start with a simple sanity check
   if ( baseRows.isZero() || newRows.isZero() || baseUec.isZero() )
     return csZero; // rows(0) ==> uec(0)

   // now let's get started ... here's a good zeroth approximation
   CostScalar firstApprox = baseUec ;

   // as a first approximation, we limit uec to be (at most) the new rowcount
   if ( firstApprox > newRows )
 	firstApprox = newRows ; // reduce to the new rowcount

   // be careful to not reduce uec if the rowcount is increasing (or
   // remaining constant)!
   if ( newRows >= baseRows )
   {
 	return firstApprox ; // this is equal to MIN (baseUec, newRows)
   }

 // ------------------------------------------------------------------------
 // The UEC reduction formula is derived as follow
 //
 // u boxes
 // N balls
 //
 // Balls are distributed among the u boxes.
 //
 // The distribution was performed randomly except for that the occupancy
 // of any box should be greater than 0.  This was done by putting 1 ball
 // in each of the u boxes and distributing the remaining N-u balls
 // randomly (with equal probabilities for each box).
 //
 // Now apply an orthogonal selection with reduction factor R; i.e., remove
 // (1-R)*N balls randomly (each ball has same chance of being removed,
 // i.e., (1-R))
 //
 // The probability that a box is empty is
 // = (Prob. of that first ball removed)
 //    * (Prob. all other balls in the box were removed)
 //
 // The first term is simply (1-R)
 // The second term can be shown to be (1-R/u)^(N-u)
 // which can be approximated to exp(-R(N-u)/u) for N-u >> 1
 //
 // Consequently the result UEC (or # non empty boxes) is
 // UEC = u*(1-(1-R)*(1-R/u)^(N-u))
 //     = u*(1-(1-R)*exp(-R(N-u)/u))   N-u >> 1     ----(1)
 //
 // As for the linear count estimate it does not take into account
 // the requirement that box initial occupancy > 0. Consequently
 // It performs poorly when the values of N and u are close, which
 // is a very common case.
 // -----------------------------------------------------------------------

   //NB: if we reach here, baseRows >= newRows

   double R = ((CostScalar) newRows / baseRows).value() ;
   double N = baseRows.value() ;
   double u = baseUec.value() ;

   CostScalar secondApprox ;


   // we fudge >> to mean greater than 10; this should be fine
   if ( N-u > 10 )
 	secondApprox = u * (1-(1-R) * exp(-R*(N-u)/u)) ;
   else
 	secondApprox = u * (1-(1-R) * mypow(1-R/u,N-u)) ;


   // sanity check
 //  if ( secondApprox.getValue() <= csZero.getValue() || secondApprox > firstApprox )
   if ( NOT secondApprox.isGreaterThanZero() OR secondApprox > firstApprox )
 	secondApprox = firstApprox ;

   return secondApprox ;
 }

 // -----------------------------------------------------------------------
 //  ColStatDesc::applySel
 //
 //  Apply the provided selectivity to the underlying column statistics
 //  (ColStats structure).
 // -----------------------------------------------------------------------

 void
 ColStatDesc::applySel( const CostScalar & selectivity )
 {
   if ( selectivity == csOne )
     return ; // nothing to do

   ColStatsSharedPtr colStats = getColStatsToModify() ;

   // --------------------------------------------------------------------
   // when we "scale up" a histogram, it loses its uniqueness
   if ( selectivity.isGreaterThanOne() /* > 1 */)
   {
     colStats->setUnique( FALSE );
   }
   // --------------------------------------------------------------------

   const CostScalar & baseUec      = colStats->getTotalUec();
   const CostScalar & baseRowcount = colStats->getRowcount();
   const CostScalar   newRowcount  = MIN_ONE_CS(baseRowcount * selectivity);
   const CostScalar & originalBaseRowCount = colStats->getBaseRowCount();

   CostScalar baseRowCountForUec = baseRowcount;

   if ((originalBaseRowCount < baseRowcount) &&
     (originalBaseRowCount != csMinusOne))
     {
        baseRowCountForUec = originalBaseRowCount;
        colStats->setBaseRowCount(-1);
     }

   CostScalar newTotalUec, uecSelectivity;

   if ( baseUec.isZero() ) // avoid div-by-zero!
   {
     newTotalUec = csZero;
     uecSelectivity = csZero;
   }
   else if ( colStats->isUnique() )
   {
     // If this is a UNIQUE column, UEC == rowcount
     newTotalUec = newRowcount;
     uecSelectivity = selectivity;
   }
   else
   {
     newTotalUec = calculateCorrectResultUec( baseRowCountForUec,
 					     newRowcount,
 					     baseUec);

     uecSelectivity = newTotalUec / baseUec;
   }

   // If row count is anywhere >= one, then uplift the UEC to atleast one.
   if ((newRowcount >= csOne) && (newTotalUec < csOne))
     newTotalUec = csOne;

   colStats->setRedFactor   ( colStats->getRedFactor() * selectivity );
   colStats->setUecRedFactor( colStats->getUecRedFactor() * uecSelectivity );
   colStats->setRowsAndUec  ( newRowcount, newTotalUec );
 }

 // ----------------------------------------------------------------------
 //  ColStatDesc::applySelIfSpecifiedViaHint
 //
 //  This helper method is called by various cardinality estimation
 //  methods to adjust cardinality based on user-specified selectivity.
 // ----------------------------------------------------------------------
 void
 ColStatDesc::applySelIfSpecifiedViaHint(ItemExpr * pred, const CostScalar & oldRowcount)
 {
   // If user specified selectivity for this predicate, we need to make
   // adjustment in reduction to reflect that.
   if(pred->isSelectivitySetUsingHint())
   {
     ColStatsSharedPtr colStats = getColStats();
     CostScalar selAdjustment(oldRowcount * pred->getSelectivityFactor()/getColStats()->getRowcount());
     if(selAdjustment > csZero)
     {
       applySel(selAdjustment);
       colStats->setSelectivitySetUsingHint();
     }
   }
 }

 // ----------------------------------------------------------------------
 //  ColStatDesc::synchronizeStats
 //
 //  Change the rowcount of THIS to match the specified newRowcount;
 //    Apply the change to histograms by changing the associated RedFactor.
 //  Change the UEC of THIS in proportion to the change in UEC characterized
 //    by baseUec and newUec.  Again, apply this to histograms by changing
 //    the associated uecRedFactor.  Do not allow the resulting UEC to exceed
 //    the resulting rowcount.
 //
 //  The third parameter is optional; it is used by some functions which
 //  are certain that under no circumstances should the UEC be allowed to
 //  be changed.  Otherwise, UEC is reduced as per the function above.
 //
 //  NOTE how similar this function is to CSD::applySel() above.  In most
 //  cases, when both the old and new rowcounts are known (e.g., in cases
 //  when we're not just applying some selectivity), this function should
 //  be used --> except, of course, in cases when we need to make sure to
 //  reduce the uec to be at most 1.
 // ----------------------------------------------------------------------
 void
 ColStatDesc::synchronizeStats( const CostScalar & baseRowcount,
                                const CostScalar & newRowcount,
                                SynchSpecialFlag specialFlag )
 {
   CCMPASSERT( baseRowcount.isGreaterOrEqualThanZero() );
   CCMPASSERT( newRowcount.isGreaterOrEqualThanZero() );

   if ( getColStats()->getRowcount() == newRowcount )
     return ; // nothing to do

   // First check to do is to see if RowCount can be uplifted to one, if
   // it is not zero and less than one. This will solve the problem of
   // UECs less than 1, which can reach 0 and cause overflow - RV

   CostScalar newRowCountCorrected;
   newRowCountCorrected = MIN_ONE_CS(newRowcount);

   // now use this adjusted newRowCount for all further calculations

   // it's really not clear what the rowred should be when the baseRowcount
   // starts out as zero ...
   // avoid divided by zero
   CostScalar rowSelectivity =
     (baseRowcount.isZero()) ? csOne : newRowCountCorrected / baseRowcount;

   ColStatsSharedPtr colStats = getColStatsToModify();
   // get base row count of the histogram, to see if that can be used to
   // calculate the Uec reduction RV
   const CostScalar &originalBaseRowCount = colStats->getBaseRowCount();
   // to get the maximum reduction, use the lower row count

  CostScalar baseRowCountForUec = baseRowcount;

  if ((originalBaseRowCount < baseRowcount) &&
      (originalBaseRowCount != csMinusOne))
      {
         baseRowCountForUec = originalBaseRowCount;
         colStats->setBaseRowCount(-1);
      }

   if ( colStats->getRowcount() != baseRowcount )
   {
     CostScalar cardDiff = colStats->getRowcount() - baseRowcount;
     if (_ABSOLUTE_VALUE_(cardDiff.value()) > 1.0)
     {

       // unusual case: but if it does happen, fudge the synchronization
       // results based upon how far our numbers are from what is expected.
       // The way we do round our cardinalities, difference of one row shows
       // up, which could be unintentional. Hence we would fudge the
       // synchronization results only if the difference is more than one.
       // Else we shall go by the reduction of newRowcount / oldRowcount
       // because that is what it should be
       // There is an extra check of != to avoid computing of ABSOLUTE_VALUE
       // in most cases.

       rowSelectivity =
 	(baseRowcount.isZero()) ? csOne : colStats->getRowcount() / baseRowcount;
     }
   }

   // --------------------------------------------------------------------
   // when we "scale up" a histogram, it loses its uniqueness
   if ( rowSelectivity > csOne )
   {
     colStats->setUnique( FALSE );
   }
   // --------------------------------------------------------------------

   const CostScalar & baseUec = colStats->getTotalUec();

   CostScalar newTotalUec, uecSelectivity;

   NABoolean rowCountIsOne = FALSE;

   if ( baseUec.isZero() ) // avoid div-by-zero
   {
     newTotalUec = csZero;
     uecSelectivity = csZero;
   }
   else if ( specialFlag == DO_NOT_REDUCE_UEC )
   {
     // Sometimes the calling function knows that the UEC should not change.

     // Want to calculate uecSelectivity right.
     newTotalUec = MINOF( baseUec, newRowCountCorrected );
     uecSelectivity = newTotalUec / baseUec;	  // usually 1
   }
   else if ( colStats->isUnique() )
   {
     // If it's a UNIQUE column, then UEC == ROWCOUNT.
     if ( specialFlag == SET_UEC_TO_ONE )
     {
       // In this case, both uec & rowcount should be 1.
       rowCountIsOne = TRUE;
       rowSelectivity =
 	(baseRowcount.isZero()) ? csOne : csOne / baseRowcount;
     }

     newTotalUec    = rowCountIsOne ? csOne : newRowCountCorrected ;
     uecSelectivity = rowSelectivity ;
   }
   else if ( specialFlag == SET_UEC_TO_ONE )
   {
     // Sometimes the calling function knows that
     // resulting UEC should be at most 1.

     newTotalUec = MINOF( baseUec, csOne );
     // Want to calculate uecSelectivity right.
     newTotalUec = MINOF( newTotalUec, newRowCountCorrected );
     uecSelectivity = newTotalUec / baseUec;
   }
   else // the usual case
   {
     newTotalUec = calculateCorrectResultUec( baseRowCountForUec,
                                  newRowCountCorrected,
                                  baseUec);
 	uecSelectivity = newTotalUec / baseUec;
   }

   // If row count is one, then uplift the UEC to one also.
   if ((newRowCountCorrected >= csOne) && (newTotalUec < csOne))
     newTotalUec = csOne;

   colStats->setRedFactor   ( colStats->getRedFactor() * rowSelectivity );
   colStats->setUecRedFactor( colStats->getUecRedFactor() * uecSelectivity );
   colStats->setRowsAndUec  ( ( rowCountIsOne ? csOne : newRowCountCorrected ),
 			     newTotalUec );
 }

 // -----------------------------------------------------------------------
 //  ColStatDesc::modifyStats
 //
 //  Given a supported predicate, apply the effect of the predicate on the
 //  ColStats structure, as well as its corresponding histogram.  This
 //  method expects the unary predicates
 //              IS [NOT] NULL
 //              IS [NOT] UNKNOWN
 //  and binary predicates of the form
 //              COLUMN <op> <constant>
 //              <constant> <op> COLUMN
 //  This routine also checks for doable, simple, special, cases like
 //              column_a <op> column_a
 //
 //  Calls these functions to do the work of predicate application:
 //
 //      ColStats::modifyStats
 //      ColStats::simplestPreds
 //
 //  return value:  semantic =~= "everything's OK"
 //  -------------
 //  FALSE if one of the following:
 //      1. none of the above predicate cases applies
 //      2. the identified column does not appear in the column list
 //      3. the CSD's histogram is NULL
 //
 //  TRUE if none of the three conditions for FALSE are met,
 //      or if the predicate has already been applied
 // -----------------------------------------------------------------------


 NABoolean
 ColStatDesc::modifyStats( ItemExpr *pred, CostScalar & newRowcount,
                           CostScalar *maxSelectivity )
 {
   const ValueId & predValueId = pred->getValueId();
   OperatorTypeEnum op = pred->getOperatorType();
   ItemExpr * lhs = pred->child(0);
   ItemExpr * rhs = NULL;
   ConstValue * constant = NULL;
   NABoolean negate = FALSE;

   if ( pred->getArity() > 1 )
   {
     rhs = pred->child(1);
     constant = rhs->castToConstValue( negate );
     if(constant == NULL)
     {
       if (rhs->getOperatorType() == ITM_VEG_REFERENCE)
       {
         const VEG * veg = ((VEGReference *)rhs)->getVEG();
         ValueId constId = veg->getAConstant();
         if(constId != NULL_VALUE_ID)
 	  constant = constId.getItemExpr()->castToConstValue( negate );
       }
       else
       {
         if ((op == ITM_EQUAL) &&
             (rhs->getOperatorType() == ITM_CACHE_PARAM) )
         {
           ItemExpr * constantExpr = ((ConstantParameter *)rhs)->getConstVal();
           if (constantExpr == NULL)
             return FALSE;
           constant =  constantExpr->castToConstValue(negate);        }// cache_param
       } // not aveg_reference
     }// constant !=null
   } //arity > 1

   // get writable copies of the ColStats structure
   ColStatsSharedPtr colStats = getColStatsToModify();
   newRowcount = colStats->getRowcount();

   // stand alone column should always be on lhs

   // if the predicate being applied is IS NULL then do not skip
   // instantiate_null expression from lhs.
   NABoolean digIntoInstantiateNull = TRUE;
   if ((op == ITM_IS_NULL) || (op == ITM_IS_NOT_NULL))
     digIntoInstantiateNull = FALSE;

    if ( (lhs->getOperatorType() != ITM_VEG_REFERENCE) &&
         (constant || (op == ITM_EQUAL) || (op == ITM_NOT_EQUAL) ||
 	  (op == ITM_IS_NULL) || (op == ITM_IS_NOT_NULL)))
   {
     lhs = lhs->getLeafValueIfUseStats(digIntoInstantiateNull);
   }

   ValueId vegCol = getVEGColumn();
   if (lhs->getValueId() != vegCol )
   {
     // if the valueIDs match, then we have found the histograms for the
     // child, proceed with applying predicate
     // If the valueIds are not equal, then see if it is a VEG region
     // (defined by VEG_REFERENCE). If it is, dig into the region to
     // get VEG references. This is required especially for full outer joins,
     // where the join expression forms a VEG region of its own with
     // VEG references from the two children. However, the histogram for the join
     // is still identified by the VEG reference of the left child
     if (lhs->getOperatorType() == ITM_VEG_REFERENCE)
     {
       ValueIdSet cols;
       lhs->findAll(ITM_VEG_REFERENCE, cols, TRUE, TRUE);
       if (!cols.contains(vegCol))
         return FALSE;
     }
     else
      return FALSE;
   }

   // three kinds of preds that we can handle:
   // 1. constant is not NULL (i.e., of form <constant> op <col>)
   // 2. unary predicate
   // 3. binary predicate on same column
   // ==> if none of these is TRUE, then we can't evaluate this predicate
   if ( NOT (	constant != NULL
 	     OR pred->getArity() == 1
 	     OR (     pred->getArity() == 2
 		  AND lhs->getValueId() == rhs->getValueId()
 		)
 	   )
      )
   {
     return FALSE;      // Correct column, but can't evaluate predicate....
   }

   // OK, all's well, now we evaluate the predicate

   CostScalar rowcount = csZero;
   CostScalar uec      = csZero;
   const CostScalar & origRowcount = colStats->getRowcount();
   const CostScalar & origUec      = colStats->getTotalUec();

   // These will be used later to check for the boundary condition for
   // range predicates
   const EncodedValue maxValue = colStats->getMaxValue();
   const EncodedValue minValue = colStats->getMinValue();

   // get the encoded value of the constant if any
   EncodedValue val (UNINIT_ENCODEDVALUE) ;

   if (constant != NULL)
     // get the encoded format for the constant
     val = EncodedValue (constant, negate);


   // check: if we've already applied this predicate, don't do it again!
   if ( isPredicateApplied( predValueId ) )
   {
     newRowcount = origRowcount;
     return TRUE;
   }

   // extrapolate histogram, if the value being looked for lies outside the histogram boundaries
   // no need to extrapolate if the predicate being applied is less than or less than equal to
   if (colStats->getStatColumns()[0]->getType()->getTypeQualifier() == NA_DATETIME_TYPE
       && val >= maxValue
       && !colStats->isOrigFakeHist()
       && (op == ITM_EQUAL || op == ITM_GREATER || op == ITM_GREATER_EQ)
       && maxSelectivity == NULL
       && constant != NULL)
          colStats->adjustRowcountforRollingColumns(constant);

   // OK, predicate has NOT already been applied

   // remember whether or not a histogram is 'fake' prior to applying the
   // given predicate.
   NABoolean isaFakeHistogram = colStats->isFakeHistogram() ;

   if ( (pred->getArity() == 2) &&
        (lhs->getValueId() == rhs->getValueId()) )
   {
     if (maxSelectivity == NULL)
     colStats->simplestPreds( pred );
   }
   else
   {
     colStats->modifyStats( pred, maxSelectivity );
   }

   // maxSelectivity computation is done
   if (maxSelectivity) {
     newRowcount = colStats->getRowcount();
     return TRUE;
   }

   rowcount = colStats->getRowcount();
   uec      = colStats->getTotalUec();

   // ------------------------------------------------------------------
   // Lastly, if this predicate was applied to a 'fake' histogram
   //    AND the predicate did not eliminate all of the rows,
   //    AND no 'similar' predicate has already been applied.
   // ==> Alter this predicate's impact on the total RowCount & UEC to
   // match that of a 'default' predicate.  In cases of multiple
   // applications of 'similar' predicates, any shape-changing impact that
   // occurs above will continue to have its effect, but no actual
   // reduction in rowcount will occur.
   //
   // All changes are accomplished via alterations to appropriate reduction
   // factors.
   // ------------------------------------------------------------------

   // ------------------------------------------------------------------
   // When we're certain that we should end up with zero rows, we set the
   // histogram accordingly :
   //   1. max(min)-set-by-pred are TRUE; and
   //   2. histogram has one interval with bounds UNINIT_ENCODEDVALUE
   //   3. max/min bounds are set to UNINIT_ENCODEDVALUE
   // Unless these conditions are met, then if we ever end up with zero
   // rows, we want to undo what we did and apply default selectivity
   // (instead of the overly-selective predicate that we just applied).
   // ------------------------------------------------------------------

   HistogramSharedPtr hist = colStats->getHistogramToModify() ;

   if ( rowcount.isZero()
        AND NOT (     hist->entries() == 1
 		 AND colStats->isMaxSetByPred()
 		 AND colStats->isMinSetByPred()
 		 AND colStats->getMaxValue() == UNINIT_ENCODEDVALUE
 		 AND colStats->getMinValue() == UNINIT_ENCODEDVALUE
 	       )
      )
   {
     // At this point we're not going to preserve any multiple-interval
     // shape of the histogram, because we're grasping at straws in our
     // attempt to prevent a zero-overall rowcount.  This is a kludge to
     // prevent undesired zero rowcounts (we operate under the assumption
     // that *all* zero rowcounts are unwanted).  The easiest/best way to
     // proceed is to populate a single-interval histogram with the
     // original rows & uec, declare this a fake histogram, and then
     // apply default selectivity (see next if-clause below) to come up with
     // a more reasonable reduction.
     hist->condenseToSingleInterval();
     colStats->setIsCompressed(TRUE);

     // first, set the aggregate values
     colStats->setRowsAndUec  ( origRowcount, origUec );
     colStats->setRedFactor   ( csOne );
     colStats->setUecRedFactor( csOne );

     // Set first interval's rowcount and uec.
     hist->getFirstInterval().setRowsAndUec( origRowcount, origUec );

     // from this point forward, we're going to consider this a fake histogram
     colStats->setFakeHistogram();
     isaFakeHistogram = TRUE;

     // finally, reset the rowcount/uec values
     rowcount = colStats->getRowcount();
     uec      = colStats->getTotalUec();
   }

   // after we have applied the predicate on the histogram (modified the
   // interval boundaries to reflect the predicate), and taken care of
   // special conditions, we check to see if the predicate applied
   // is a derivative of LIKE, or a similar predicate has been applied
   // to that histogram. If it is, then we don't want to apply the
   // reduction twice on that histogram. We shall bring the
   // rowcount and the UEC to what it was before the predicate was
   // applied. Hence, at the end of it, we shall have the boundaries
   // of the histogram intervals reflecting the two predicates,
   // and the rows and UEC reflecting one predicate. For LIKE predicate
   // this will ensure that the final selectivity after applying both range
   // predicates is equal to the default selectivity of LIKE predicates
   //


   if (derivOfLikeAndSimilarPredApp(pred) )
   {
     synchronizeStats( rowcount, origRowcount, DO_NOTHING_SPECIAL );
   }
   else
   {
 	if (isaFakeHistogram
 	     AND NOT (     hist->entries() == 1
 	     AND colStats->isMaxSetByPred()
 	     AND colStats->isMinSetByPred()
 	     AND colStats->getMaxValue() == UNINIT_ENCODEDVALUE
 	     AND colStats->getMinValue() == UNINIT_ENCODEDVALUE
 	       )
 	   )
 	{
           CostScalar defaultRowcount = origRowcount;

           if (!isSimilarPredicateApplied(pred->getOperatorType()))
           {
             // if it is a fake histogram, then for equality predicate, set the rowcount as
             // the sqrt of the original rowcount
             if (op == ITM_EQUAL)
             {
               if (colStats->isUnique())
                 defaultRowcount = 1;
               else
                 defaultRowcount = ceil(sqrt(origRowcount.getValue()));
             }
             else
               if (op == ITM_NOT_EQUAL)
               {
                 defaultRowcount.minCsOne();
                 if (colStats->isUnique())
                   defaultRowcount -= 1;
                 else
                   defaultRowcount = origRowcount - ceil(sqrt(origRowcount.getValue()));
               }
               else
                 defaultRowcount = origRowcount * pred->defaultSel();
           }

 	  SynchSpecialFlag specialFlag = DO_NOTHING_SPECIAL;
 	  // handle the operators which set UEC to one
 	  if ( op == ITM_IS_NULL ||
 	   op == ITM_IS_UNKNOWN ||
 	   op == ITM_EQUAL )
 	  {
 		// the resulting UEC after this operation should be at most one
 		specialFlag = SET_UEC_TO_ONE;
 	  }

 	  synchronizeStats( rowcount, defaultRowcount, specialFlag );
 	}
   }

   addToAppliedPreds( predValueId );  // in this case: add when done

   // Now do the lower bound check for range predicates

   if ( !isaFakeHistogram && (val != UNINIT_ENCODEDVALUE) )
   {
     if ( op == ITM_LESS || op == ITM_LESS_EQ ||
        op == ITM_GREATER || op == ITM_GREATER_EQ)
     {
       double baseRowcount = colStats->getBaseRowCount().getValue();
       double baseUec = (colStats->getUecBeforePreds().getValue());
       if (baseUec < 1.0) baseUec = 1.0;

       double minRowcount = baseRowcount/baseUec;

       if (colStats->getRowcount() < minRowcount)
 	synchronizeStats( rowcount, minRowcount );
     }
   }

   newRowcount = colStats->getRowcount();
   return TRUE;
 } // ColStatDesc::modifyStats

 // This method merges the two histograms from the same table
 NABoolean
 ColStatDesc::mergeColStatDescOfSameTable(ColStatDescSharedPtr &rightColStats,
 					 OperatorTypeEnum opType)
 {
   NABoolean checkForMergeFromScan = FALSE;

   if ((opType == REL_SCAN) || (opType == ITM_OR) || (opType == ITM_AND) )
     checkForMergeFromScan = TRUE;

   if ((!checkForMergeFromScan) ||
       (CmpCommon::getDefault(COMP_BOOL_74) == DF_OFF) )
     return FALSE;

   ColStatsSharedPtr rootColStats = getColStatsToModify();

   CostScalar leftUec = rootColStats->getTotalUec();
   CostScalar rightUec = rightColStats->getColStats()->getTotalUec();
   // In exceptional cases where one of the columns has been reduced by another predicate
   // example, upper(col1) = upper(col2) and col1 = 'B', we should to take the lower rowcount
   // Without the upper expression, we will get a VEG here, and that would be handled at
   // the method applyVEGPred method and the control will not come here
   CostScalar newRowcount = MINOF(rootColStats->getRowcount(), rightColStats->getColStats()->getRowcount());
   CostScalar minUec = MINOF(leftUec, rightUec);

   // use HIST_NO_STATS_UEC to compute selectivity for equality prdicates
   // from same table (T1.a = T1.b)

   double selectivityForPredEqual = (1.0/(CURRSTMT_OPTDEFAULTS->defNoStatsUec()) );
   newRowcount = (newRowcount * selectivityForPredEqual).minCsOne();
   CostScalar newUec = MINOF(minUec, newRowcount);

   HistogramSharedPtr hist = rootColStats->getHistogramToModify() ;

   hist->condenseToSingleInterval();
   rootColStats->setIsCompressed(TRUE);
   // Set first interval's rowcount and uec.
   hist->getFirstInterval().setRowsAndUec( newRowcount, newUec );

   // first, set the aggregate values
   rootColStats->setRowsAndUec  ( newRowcount, newUec );
   if(opType == REL_SCAN)
     rootColStats->setBaseUec(newUec);
   rootColStats->setRedFactor   ( csOne );
   rootColStats->setUecRedFactor( csOne );
   rootColStats->setRecentJoin(TRUE);
   appliedPreds().insert( rightColStats->getAppliedPreds() );
   mergeState().insert(rightColStats->getMergeState() );

   // from this point forward, we're going to consider this a fake histogram
   rootColStats->setFakeHistogram();
   return TRUE;
 }

 // -----------------------------------------------------------------------
 // ColStatDesc::mergeColStatDesc
 //
 // merge two ColStatDesc's
 //
 // forceMerge overrides logic that prevents equijoins of the form
 //      <col_1> = <col_1>
 //
 // Calls
 //
 //      ColStats::mergeColStats
 //
 // to do the low-level merge work
 // -----------------------------------------------------------------------
 void
 ColStatDesc::mergeColStatDesc (ColStatDescSharedPtr& mergedStatDesc,
                                MergeType mergeMethod,
                                NABoolean forceMerge,
                                OperatorTypeEnum exprOpCode,
                                NABoolean mergeFVs)
 {
   ColStatsSharedPtr rootColStats = getColStatsToModify();
   ColStatsSharedPtr mergedColStats = mergedStatDesc->getColStats();

   CostScalar minCard = csOne;
   // before we start manipulating histograms for join
   // let collect the minimum cardinality, that will be used
   // to do the sanity check later. Do this only if the
   // parent is a Join
   if ((CmpCommon::getDefault(COMP_BOOL_45) == DF_ON ) &&
       (exprOpCode == REL_JOIN) &&
         (mergeMethod == INNER_JOIN_MERGE) &&
 	  !rootColStats->isVirtualColForHist() &&
 	  !mergedColStats->isVirtualColForHist() )
   {
     if (CmpCommon::getDefault(COMP_BOOL_46) == DF_OFF)
     {
       CostScalar leftFreq = rootColStats->getMaxFreq();
       CostScalar rightFreq = mergedColStats->getMaxFreq();
       minCard = MAXOF(leftFreq, rightFreq).minCsOne();
     }
   } // exprOpCode == REL_JOIN

   if ( NOT ( mergeMethod == UNION_MERGE ||
              mergeMethod == OR_MERGE    ||
              mergeMethod == LEFT_JOIN_OR_MERGE )
        /* not really a join */
      )
   {
     // --------------------------------------------------------------------
     // We do the usual case first :
     //        merges that are *NOT* (the unusual) UNIONs, ORs, or LEFT_JOIN_ORs
     // --------------------------------------------------------------------
     // For join-related merges, add any/all entries in the nonVegEquals_
     // ColStatDesc LIST of the mergedStatDesc to the nonVegEquals_ in
     // THIS.
     //
     // Unfortunately, this can't be a simple set insert, because members
     // of the set(s) are pointers, and we may have two different pointers
     // pointing to the 'same' ColStatDesc.
     // --------------------------------------------------------------------
     for ( CollIndex j = 0; j < mergedStatDesc->nonVegEquals_.entries(); j++ )
     {
       ColStatDescSharedPtr tmpDescJ = mergedStatDesc->nonVegEquals_[j];
       NABoolean foundFlag = FALSE;

       for ( CollIndex i = 0; i < nonVegEquals_.entries(); i++ )
       {
 		ColStatDescSharedPtr tmpDescI = nonVegEquals_[i];
 		if ( tmpDescI->VEGcolumn_ == tmpDescJ->VEGcolumn_ )
 		{
 		  foundFlag = TRUE;
 		  break ;
 		}
       } // for i

       if ( foundFlag == FALSE )
 	  nonVegEquals_.insert( tmpDescJ );
     } // for j

 	NABoolean skipJoin = FALSE;

     // --------------------------------------------------------------------
     // For any type of Join-related merge, Test for column statistics
     // with identical merge states, and don't perform that join.
     // --------------------------------------------------------------------
     if ( mergedStatDesc->getMergeState() == mergeState_ &&
 	 forceMerge == FALSE )
     {
       // if the root contains more rows than the to-be-merged version,
       // overwrite the root with the other version.
       // else do nothing: rootColStats is o.k.
       if ( rootColStats->getRowcount() > mergedColStats->getRowcount() )
 		rootColStats->overwrite( *mergedColStats );

       // update the applied predicates
       appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
 	  skipJoin = TRUE;
     } // forceMerge == FALSE, the two merge states are same
     // Test for rootState being a subset of the mergedState.
     else if ( mergedStatDesc->getMergeState().contains( mergeState() ) &&
 	      forceMerge == FALSE )
 	  {
 		// the to-be-merged colStats shows the effects of a merge with
 		// the rootColStats; overwrite the root copy
 		rootColStats->overwrite( *mergedColStats );

 		// update the mergeState_ of the root copy.
 		mergeState().clear();
 		mergeState().insert( mergedStatDesc->getMergeState() );

 		// and its applied predicates
 		appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
 		skipJoin = TRUE;
 	  } // root state is a subset of merged state
         // left contains right -- join's already been done, and we're not forced
         else if ( mergeState().contains( mergedStatDesc->getMergeState() ) &&
 	          forceMerge == FALSE )
         {
           return ;  // nothing left to merge
         }
 	else	// join on unique columns
 	if ( forceMerge == FALSE &&
         ( (mergeMethod == INNER_JOIN_MERGE) ||
           (mergeMethod == SEMI_JOIN_MERGE) ) )
         {
 	  // Before starting with the checks, if the statistics exist
 	  // for the columns being joined. Skip the following logic, if
 	  // 1. no statistics exists for either of the columns
 	  // 2. join is being performed on the histograms with virtual column

 	  if ((CmpCommon::getDefault(COMP_BOOL_48) == DF_ON) ||
             (!rootColStats->isOrigFakeHist() &&
 		  !mergedColStats->isOrigFakeHist() &&
 		  !rootColStats->isVirtualColForHist() &&
 		  !mergedColStats->isVirtualColForHist() ))
 	  {
             NABoolean scaleFreq = TRUE;
             if (mergeMethod == SEMI_JOIN_MERGE)
               scaleFreq = FALSE;

             // merge freq values of the two sides, if the number of values in the freq value list for both
             // histograms is less than the threshold value and in case of tuple list, a frequent value list
             // has been created
             //
             NABoolean mergeFreqValues = FALSE;

             CostScalar uecCushion ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_4));
 	    // check if it is a primary_key - foreign_key join.If yes, and
 	    // No merge is required

 	    ValueIdSet colSetLeft = mergeState();
 	    NABoolean leftJoinUnique = FALSE;

 	    ValueIdSet colSetRight = (ValueIdSet)(mergedStatDesc->getMergeState());
 	    NABoolean rightJoinUnique = FALSE;
             NABoolean joinWithTupleList = FALSE;

 	    // For semi_joins, only right side being unique matters
 	    if ( (colSetLeft.entries() == 1) &&
                   (mergeMethod != SEMI_JOIN_MERGE) )
 	    {
               ValueId colIdLeft;
               colSetLeft.getFirst(colIdLeft);

               // Check to see if it is a join with a tuple list
               // In this case the column type should be ITM_NATYPE

               if ((CmpCommon::getDefault(COMP_BOOL_48) == DF_ON) &&
                   (colIdLeft.getItemExpr()->getOperatorType() == ITM_NATYPE) )
               {
                 joinWithTupleList = TRUE;
                 // in case of a tuple list, do not do a regular merge of frequent values if
                 // there was no frequent value list created. That will be true if the
                 // number of elemenst in the IN list for which the tuple list was created
                 // had elements less than or equal to CQD HIST_TUPLE_FREQVAL_LIST_THRESHOLD
                 // Please refer method addColStatDescForVirtualCol for use of this CQD
                 if (rootColStats->getFrequentValues().entries() > 0)
                   mergeFreqValues = TRUE;
               }

               // join with a tuple list will always be unique as the
               // UEC = rowcount
 	      if (joinWithTupleList || rootColStats->isAlmostUnique())
                 leftJoinUnique = TRUE;
 	      else
 	      {
                 BaseColumn * colExprLeft = colIdLeft.castToBaseColumn();

                 if (colExprLeft != NULL)
                 {
                   TableDesc * tableDescForLeftCol = colExprLeft->getTableDesc();
                   leftJoinUnique = colSetLeft.doColumnsConstituteUniqueIndex(tableDescForLeftCol);
                 }
               }
             }

 	    if (colSetRight.entries() == 1)
 	    {
 	      ValueId colIdRight;
 	      colSetRight.getFirst(colIdRight);
               // Check to see if it is a join with a tuple list
               // In this case the column type should be ITM_NATYPE

               if ((CmpCommon::getDefault(COMP_BOOL_48) == DF_ON) &&
                   (colIdRight.getItemExpr()->getOperatorType() == ITM_NATYPE) )
               {
                 joinWithTupleList = TRUE;
                 // in case of a tuple list, do not do a regular merge of frequent values if
                 // there was no frequent value list created. That will be true if the
                 // number of elemenst in the IN list for which the tuple list was created
                 // had elements less than or equal to CQD HIST_TUPLE_FREQVAL_LIST_THRESHOLD
                 // Please refer method addColStatDescForVirtualCol for use of this CQD
                 if (mergedColStats->getFrequentValues().entries() > 0)
                   mergeFreqValues = TRUE;
               }

               // join with a tuple list will always be unique as the
               // UEC = rowcount
 	      if(joinWithTupleList || mergedColStats->isAlmostUnique() )
 		    rightJoinUnique = TRUE;
 	      else
 	      {
 		BaseColumn * colExprRight = colIdRight.castToBaseColumn();

 		if (colExprRight != NULL)
 		{
 		  TableDesc * tableDescForRightCol = colExprRight->getTableDesc();
 		  rightJoinUnique = colSetRight.doColumnsConstituteUniqueIndex(tableDescForRightCol);
 		}
 	      }
 	    }

 	    CostScalar leftUec;
 	    CostScalar rightUec;

 	    // Fix for Sol:10-070222-2759. The join cardinalities were highly
 	    // underestimated. This is because of the assumption the optimizer makes
 	    // regarding the relationship between the joining columns. The joining
 	    // columns can be either orthogonal or contained within each other.
 	    // Containment determines if the joining column of the one table
 	    // should take into account the reduction on the column from the other side
 	    // The way the reduction should be computed is controlled by a CQD
 	    // By default we assume that the columns are orthogonal. That is reduction
 	    // on one side of a table, should not impact the other side.
 	    // CQD used for this is HIST_ASSUME_INDEPENDENT_REDUCTION,
 	    // and is ON by default

 	    if (CURRSTMT_OPTDEFAULTS->histAssumeIndependentReduction())
 	    {
 	      leftUec = rootColStats->getBaseUec();
 	      rightUec = mergedColStats->getBaseUec();
               UInt32 upliftCardCond = CURRSTMT_OPTDEFAULTS->histOptimisticCardOpt();
               if ((CmpCommon::getDefault(COMP_BOOL_45) == DF_ON) &&
                   ( (upliftCardCond == 1)  ||
                     (upliftCardCond == 2) ) )
               {
                 ValueIdSet joinedCols = this->getColumn();
                 joinedCols.insert(mergedStatDesc->getColumn());

                 // first get the joining column with minimum UEC
                 // CostScalar minOriginalUec = joinedCols.getMinOrigUecOfJoiningCols();
                 CostScalar minOriginalUec = MINOF(leftUec, rightUec);

                 // next get the UEC of the left and the right joining columns
                 CostScalar leftTotalUec = rootColStats->getTotalUec();
                 CostScalar rightTotalUec = mergedColStats->getTotalUec();

                 // max of left and right UEC to compute join cardinality
                 // uses single interval concept and the containment assumption
                 //leftUec = MAXOF(minOriginalUec, leftTotalUec);
                 //rightUec = MAXOF(minOriginalUec, rightTotalUec);
                 leftUec = MAXOF(leftTotalUec, minOriginalUec);
                 rightUec = MAXOF(rightTotalUec, minOriginalUec);
               } // upliftCardCond = 1 OR 2
 	    } // histAssumeIndependentReduction
 	    else
 	    {
 	      leftUec = rootColStats->getTotalUec();
 	      rightUec = mergedColStats->getTotalUec();
 	    }

 	    if (leftJoinUnique && (joinWithTupleList ||
                                   ((rootColStats->getBaseUec() * uecCushion) > mergedColStats->getBaseUec() ) ) )
 	    {
 	      // result is the otherColStats
 	      CostScalar reduction = csOne;
               if (joinWithTupleList)
                 reduction = 1 / MAXOF(rightUec, leftUec).getValue();
               else
                 reduction = 1/leftUec.value();

 	      CostScalar minUec = MINOF(rootColStats->getTotalUec(), mergedColStats->getTotalUec()) ;

               // make a copy of original rootColStats so we can merge frequent values properly later
               // This will be required if we mergeFreqValues flag is TRUE that is we need to do detailed
               // merge of frequent values
               ColStatsSharedPtr rootColStatsCopy;
               if (mergeFreqValues)
                 rootColStatsCopy = ColStats::deepCopy(*(rootColStats),HISTHEAP);

               // Over write the rootColStats with the right histogram, which is the resultant
               // histogram now
 	      rootColStats->overwrite( *mergedColStats );

               // Overwrite function does not copy frequent value list. I tried t make a copy
               // but that resulted in cardinality changes for outer joins, till we
               // figure out how to handle frequent values for outer joins, we will copy frequent
               // values separately, so that the behaviour of left joins does not change.
               // Also did not want to change the behavior of short cut join for join on columns,
               // hence do that only for tuple lists.
               if (joinWithTupleList &&
                  (rootColStats->getFrequentValues().entries() == 0) &&
                  (mergedColStats->getFrequentValues().entries() > 0))
               {
                 FrequentValueList * resultantFreqValList = new (STMTHEAP)
                   FrequentValueList(mergedColStats->getFrequentValues(), STMTHEAP);
                 rootColStats->setFrequentValue(*resultantFreqValList);
               }

 	      CostScalar uecReduction = minUec / rootColStats->getTotalUec();

               // do a detailed merge of joins, if the tuple list has frequent values attached to it
               // else simply scale the frequency and the probability of the frequent value list
               // by the row and uec reduction computed
               if (mergeFreqValues)
                 // rootColStats is now the rigthColStats, hence merge frequentvalues of right and
                 // original left root colstats
                 NABoolean dummy = rootColStats->mergeFrequentValues(rootColStatsCopy, scaleFreq);
               else
               {
                 FrequentValueList & rootFrequentValueList = rootColStats->getModifableFrequentValues();
                 rootFrequentValueList.scaleFreqAndProbOfFrequentValues(reduction, uecReduction);
               }

               // later scale the histogram keeping the frequent values unchanged, as they have already been
               // scaled
               rootColStats->scaleHistogram(reduction, uecReduction, FALSE);

 	      // update the applied predicates
 	      appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
 	      mergeState().insert(mergedStatDesc->getMergeState() );
 	      rootColStats->setRecentJoin(TRUE);
 	      rootColStats->setModified (TRUE) ;
               skipJoin = TRUE;
 	    }
 	    else
 	    {
 	      if (rightJoinUnique && (joinWithTupleList ||
                                       (( mergedColStats->getBaseUec() * uecCushion) > rootColStats->getBaseUec() ) ))
 	      {
 		// final result is this colStats, hence not much to do
 		// except scale this colstats to take care of cross product
 		// that had taken place earlier
 		// For semi-joins, the left and the rigth histograms are not scaled
 		// at the time of merge, hence, no scaling is required here too

 		CostScalar reduction = csOne;

                 // if it is join with tuple list, reduction is equal to the
                 // larger of the two bases UECs
                 if (joinWithTupleList)
                   reduction = 1 / MAXOF(rightUec, leftUec).getValue();
                 else
                 {
 		  if (mergeMethod != SEMI_JOIN_MERGE)
 		    reduction = 1/rightUec.value();
 		  else
 		    reduction = mergedColStats->getTotalUec()/mergedColStats->getBaseUec().value();
                 }

 		CostScalar minUec = MINOF(rootColStats->getTotalUec(), mergedColStats->getTotalUec()) ;
 		CostScalar uecReduction = minUec / rootColStats->getTotalUec();

               // merge the frequent values of the two sides if the heuristic used above is TRUE else
               // just scale the probability of the frequent values, keeping frequency unchanged
               // This is similar to applying direct reduction to histogram intervals
               if (mergeFreqValues)
                 NABoolean dummy = rootColStats->mergeFrequentValues(mergedColStats, scaleFreq);
               else
               {
                 FrequentValueList & rootFrequentValueList = rootColStats->getModifableFrequentValues();
                 rootFrequentValueList.scaleFreqAndProbOfFrequentValues(reduction, uecReduction);
               }

               // later scale the histogram keeping the frequent values unchanged, as they have already been
               // scaled
 		rootColStats->scaleHistogram(reduction, uecReduction, FALSE);

                 appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
 		mergeState().insert(mergedStatDesc->getMergeState() );
 		rootColStats->setRecentJoin(TRUE);
 		rootColStats->setModified (TRUE) ;
 		skipJoin = TRUE;
 	      } // rightJoinUnique
 	    } // end else leftJoinUnique
       } // histogram is !fake and !histForVirtualCol
     } // forceMerge = FALSE && mergeMethod == INNER_JOIN or SEMI_JOIN

     if (forceMerge == TRUE || skipJoin == FALSE)
     {
       // ----------------------------------------------------------------
       // merge the two column statistics
       // ----------------------------------------------------------------
       NABoolean isNumeric = FALSE; //variable used to indicate if datatype is numeric

       if(CURRSTMT_OPTDEFAULTS->reduceIntermediateHistograms())
 	if(getVEGColumn()){
 			isNumeric = getVEGColumn().getType().isNumeric();
 	}
 	else if (getColumn())
 	{
 			isNumeric = getColumn().getType().isNumeric();
 		};

       rootColStats->mergeColStats( mergedColStats, mergeMethod, isNumeric,
                                    exprOpCode, mergeFVs);

       // update the mergeState_ of the root copy.
       mergeState().insert( mergedStatDesc->getMergeState() );

       // and its applied predicates
       appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
     } // if (forceMerge == TRUE || skipJoin == FALSE)
   } // not UNION or left join
   else  // It's a UNION, OR or LEFT_JOIN_OR.
   {
     // --------------------------------------------------------------------
     // We do the unusual case second :
     //        merges that are UNIONs, ORs, or LEFT_JOIN_ORs
     //        (i.e., "non-join" merges)
     // --------------------------------------------------------------------
     // For non-join merges, an entry should remain in the nonVegEquals_
     // ColStatDesc list of the result only if it appears in both of the
     // nonVegEquals_ involved in the merge.
     // --------------------------------------------------------------------
     CollIndex j = 0;

     while ( j < nonVegEquals_.entries() )
     {
       ColStatDescSharedPtr tmpDescJ = nonVegEquals_[j];
       NABoolean foundFlag = FALSE;

       for ( CollIndex i = 0;
 	    i < mergedStatDesc->nonVegEquals_.entries() && !foundFlag;
 	    i++ )
       {
 	ColStatDescSharedPtr tmpDescI = mergedStatDesc->nonVegEquals_[i];
 	if ( tmpDescI->VEGcolumn_ == tmpDescJ->VEGcolumn_ )
 	  foundFlag = TRUE;
       } // for i

       if ( foundFlag == FALSE )
       {
 	nonVegEquals_.removeAt( j );
       }
       else
       {
 	j++;
       }
     } // while j

     // ----------------------------------------------------------------
     // merge the two column statistics
     // ----------------------------------------------------------------
     NABoolean isNumeric = FALSE; //variable used to indicate if datatype is numeric

 	if(CURRSTMT_OPTDEFAULTS->reduceIntermediateHistograms())
 		if(getVEGColumn()){
 			isNumeric = getVEGColumn().getType().isNumeric();
 		}
 		else if (getColumn())
 		{
 			isNumeric = getColumn().getType().isNumeric();
 		};

     rootColStats->mergeColStats( mergedColStats, mergeMethod, isNumeric,
                                  exprOpCode, mergeFVs);

     // And update the mergeState_ and applied predicates of the root copy.
     if ( mergeMethod == UNION_MERGE ||
 	 mergeMethod == OR_MERGE )
     {
       // -------------------------------------------------------------
       // An OR's or a UNION's result, should only indicate that a
       // predicate has been applied when it has been applied to both
       // sides of the OR or the UNION.
       //
       // This restriction impacts both the result's mergeState_ and
       // its Applied Predicate set.
       // -------------------------------------------------------------
       mergeState().intersectSet( mergedStatDesc->getMergeState() );
       appliedPreds().intersectSet( mergedStatDesc->getAppliedPreds() );
     }
     else
     {
       mergeState().insert( mergedStatDesc->getMergeState() );
       appliedPreds().insert( mergedStatDesc->getAppliedPreds() );
     }
   }
   if ((exprOpCode == REL_JOIN) && (rootColStats->getRowcount() < minCard) )
     rootColStats->setRowsAndUec(minCard, rootColStats->getTotalUec());

 } // ColStatDesc::mergeColStatDesc

 void
 ColStatDescList::setScaleFactor(CostScalar val)
 {
   if (CmpCommon::getDefault(COMP_BOOL_42) == DF_OFF)
     return;

   for ( CollIndex i = 0; i < entries(); i++ )  {
     (*this)[i]->getColStatsToModify()->setScaleFactor(val);
   }
 }

 // ------------------------------------------------------------------------
 // ColStatDesc::derivOfLikeAndSimilarPredApp
 //
 // This method is used in modifyStats (during applyPred) and
 // applyDefaultPred.
 //
 // It is used for range predicates derived from LIKE predicate. This method
 // returns TRUE if the range predicate is a derivative of LIKE predicate
 // and the first range predicate derived from LIKE has already been
 // applied to this column's histogram
 // ------------------------------------------------------------------------

 NABoolean
 ColStatDesc::derivOfLikeAndSimilarPredApp(const ItemExpr * predApplied )
 {
   // get the operator for the predicate
   OperatorTypeEnum op = predApplied->getOperatorType();

   // check to see if this is a derivative of LIKE predicate
   NABoolean predDerivOfLike = FALSE;

   if ( (op == ITM_GREATER_EQ) OR
 	   (op == ITM_GREATER) OR
 	   (op == ITM_LESS) OR
 	   (op == ITM_LESS_EQ))
   {
     BiRelat *br = (BiRelat *) predApplied;
     predDerivOfLike = br->derivativeOfLike();
 	if (! predDerivOfLike)
 	  return FALSE;
   }

   for ( ValueId id = appliedPreds_.init();
         appliedPreds_.next( id );
         appliedPreds_.advance( id ) )
     {
       const ItemExpr *pred = id.getItemExpr();
       OperatorTypeEnum appliedOp = pred->getOperatorType();

       if ( (appliedOp == ITM_GREATER_EQ) OR
 		   (appliedOp == ITM_GREATER) OR
 		   (appliedOp == ITM_LESS) OR
 		   (appliedOp == ITM_LESS_EQ) )
       {
             BiRelat *br = (BiRelat *) pred;
             if (predDerivOfLike && br->derivativeOfLike())
 			{
 			  getColStatsToModify()->setFakeHistogram();
               return TRUE;
 			}
       }
 	}
 	return FALSE;
 }


 // ------------------------------------------------------------------------
 // ColStatDesc::isSimilarPredicateApplied
 //
 // Used to avoid redundant predicate application against 'fake' ColStats,
 // this routine determines whether or not THIS's appliedPreds_ contains a
 // predicate 'similar' to its input OperatorTypeEnum op.
 //
 // Currently, the only non-identical 'similar' predicates are the >, >=
 // and <, <= predicate pairs.
 // ------------------------------------------------------------------------
 NABoolean
 ColStatDesc::isSimilarPredicateApplied (const OperatorTypeEnum op) const
 {
   for ( ValueId id = appliedPreds_.init();
 	appliedPreds_.next( id );
 	appliedPreds_.advance( id ) )
     {
       const ItemExpr *pred = id.getItemExpr();
       OperatorTypeEnum appliedOp = pred->getOperatorType();

       switch ( op )
         {
          case ITM_LESS:
          case ITM_LESS_EQ:
            if ( appliedOp == ITM_LESS || appliedOp == ITM_LESS_EQ )
              return TRUE;
            else
              break ;

          case ITM_GREATER:
          case ITM_GREATER_EQ:
            if ( appliedOp == ITM_GREATER || appliedOp == ITM_GREATER_EQ )
              return TRUE;
            else
              break;

          default:
            if ( appliedOp == op )
              return TRUE;
         }
     }
   return FALSE;
 }


 // ------------------------------------------------------------------------
 // ColStatDesc::selForRelativeRange
 //
 // this routine determines whether or not THIS's appliedPreds_ contains a
 // predicate opposite in range to its input OperatorTypeEnum op.
 //
 // Currently, the only opposite range predicates are the >, >=
 // and <, <= predicate pairs.
 //
 // This returns the value of the constant (adjusted for >= and <=) over
 //    high - low + 1
 //
 // A return of 1 indicates that this routine could not calculate the selectivity
 // ------------------------------------------------------------------------
 CostScalar
 ColStatDesc::selForRelativeRange (const OperatorTypeEnum op,
                                   const ValueId & column,
                                   ItemExpr *newPred) const
 {


   CostScalar sel = csOne;
   OperatorTypeEnum appliedOp = NO_OPERATOR_TYPE;
   ItemExpr *savePred = NULL;

   // Return the selctivity as csOne if it is a fake histograms. Then the
   // default selectivity will be applied.
   if ( getColStats()->isFakeHistogram() )
     return csOne;

   // This algorithm only works for numeric and datetime types.
   if ( NOT (( column.getType().getTypeQualifier() == NA_NUMERIC_TYPE ) ||
 	        ( column.getType().getTypeQualifier() == NA_DATETIME_TYPE)))
     return csOne;

   //  Go through all the predicates we have already applied, looking for
   //   the second part of a between type operation.
   //  One of the predicates must be (>, or >=) and the other (<, or <=)
   for ( ValueId id = appliedPreds_.init();
         appliedPreds_.next (id);
 	appliedPreds_.advance (id) )
   {
     ItemExpr *pred = id.getItemExpr();
     appliedOp = pred->getOperatorType();

     switch (op)
     {
     case ITM_LESS:
     case ITM_LESS_EQ:
       if ( appliedOp == ITM_GREATER || appliedOp == ITM_GREATER_EQ )
 	savePred=pred;
       else
 	break ;

     case ITM_GREATER:
     case ITM_GREATER_EQ:
       if ( appliedOp == ITM_LESS || appliedOp == ITM_LESS_EQ )
 	savePred=pred;
       else
 	break;

     default:
       if ( appliedOp == op )
 	break;
     }
   }


   if ( savePred )  //  if we have found a between type of operation - do more
   {
     // look for common host variables and constant with +/- with a constant
     ItemExpr * saveChildPred = savePred->child(1);

     //Use the actual node hiding behind the cast or the notCovered node.
     if ((saveChildPred->getOperatorType() == ITM_CAST) ||
 	(saveChildPred->getOperatorType() == ITM_NOTCOVERED) )
       saveChildPred = saveChildPred->child(0);

     CollIndex newPredArity  = newPred->getArity();
     CollIndex savePredArity = saveChildPred->getArity();
     OperatorTypeEnum newOp  = newPred->getOperatorType();
     OperatorTypeEnum saveOp = saveChildPred->getOperatorType();

     // We need to have one predicate compare the column to a stand alone
     // host variable and the other predicate to compare the column to a
     // host variable +/- a constant.
     if( NOT (	(     newPredArity == 0
 		  AND ( saveOp == ITM_PLUS OR saveOp == ITM_MINUS )
 		)
 	     OR (     savePredArity == 0
 		  AND ( newOp == ITM_PLUS OR newOp == ITM_MINUS )
 		)
 	    )
       )
     {
       return csOne;
     }

     // Now normalize to the two predicate side and the one predicate side
     ItemExpr * onePred;
     ItemExpr * twoPred;
     ItemExpr * hostVar = NULL;
     ConstValue  * constant = NULL;
     double cn;
     OperatorTypeEnum twoOp;
     OperatorTypeEnum origTwoOp;
     NABoolean def = FALSE;

     if ( newPredArity == 0 )
     {
       onePred = newPred;
       twoPred = saveChildPred;
       twoOp = saveOp;
       origTwoOp = appliedOp;
     }
     else
     {
       onePred = saveChildPred;
       twoPred = newPred;
       twoOp = newOp;
       origTwoOp = op;
     }

     // Identify which is the constant and which the host variable
     //  If this is a more complicated expression, (has more than 1 operator)
     //   we will not find either the constant or host variable.
     for ( Int32 arity = 0; arity < twoPred->getArity(); arity++ )
     {
       ItemExpr * operand = (*twoPred)[arity].getPtr();
       //Use the actual node hidding behind the cast / not covered node.
       if ( (operand->getOperatorType() == ITM_CAST) ||
 	   (operand->getOperatorType() == ITM_NOTCOVERED) )
 	operand = operand->child(0);


       //Check for all operators types that stay constant
       //for a particular execution of a statement
       if (    ( operand->getOperatorType() == ITM_HOSTVAR )
 	   OR ( operand->getOperatorType() == ITM_DYN_PARAM )
 	   OR (operand->getOperatorType() == ITM_CACHE_PARAM)
 	   OR (operand->getOperatorType() == ITM_CURRENT_USER)
 	   OR (operand->getOperatorType() == ITM_CURRENT_TIMESTAMP)
 	   OR (operand->getOperatorType() == ITM_UNIQUE_ID)
 	   OR (operand->getOperatorType() == ITM_UNIX_TIMESTAMP)
 	   OR (operand->getOperatorType() == ITM_SESSION_USER)
 	   OR (operand->getOperatorType() == ITM_GET_TRIGGERS_STATUS)
 	   OR (operand->getOperatorType() == ITM_UNIQUE_EXECUTE_ID))
 	hostVar = operand;

       //changed from
       /*
       if ( operand->getOperatorType() == ITM_CONSTANT )
 		constant = operand->castToConstValue( def );
 	  */
 	  //changed to
 	  //the two are almost equivalent,
 	  //castToConstValue is virtual method
 	  //on ItemExpr and class derived from it
 	  //the base implementation always returns
 	  //NULL
 	  //The above lines of code can only cover
 	  //constant literals e.g. 2. But the following
 	  //lines of code also cover simple expressions
 	  //like 1+3, 2-2. That is because castToConstValue
 	  //is overloaded for BiArithmetic expressions.
 	  //A non-null return value from castToConstValue
 	  //indicates a constant or simple constant expression, and
 	  //therefore we use the return value from castToConstValue
 	  //to check for constants.
       ConstValue * isAConstant = operand->castToConstValue(def);
       if(isAConstant)
 	constant = isAConstant;
     }

     // COLUMN <op> constant predicate?
     // if so, does column match the leading prefix of histogram?
     if ( constant != NULL )
     {
       // get the encoded format for the constant
 	  //use double cn instead.
       EncodedValue encodedConst = EncodedValue( constant, def );
       cn = encodedConst.getDblValue();

       // we want to convert Interval into seconds from what its present
       // type.
       const NAType * constantType = constant->getType();
       if (constantType->getTypeQualifier() == NA_INTERVAL_TYPE)
 	    cn = ((SQLInterval *)constantType)->getValueInSeconds(cn);
     }
     else
       return csOne;

     if ( onePred != hostVar)   // if both predicates don't refer to same hv.
       return csOne;

     //  If the range is impossible - we have zero selectivity
     if (    (	  twoOp == ITM_PLUS
 	      AND ( origTwoOp== ITM_GREATER_EQ OR origTwoOp == ITM_GREATER )
 	    )
 	 OR (	  twoOp == ITM_MINUS
 	      AND ( origTwoOp == ITM_LESS_EQ   OR origTwoOp == ITM_LESS )
 	    )
        )
       return csZero;

     EncodedValue minVal = getColStats()->getMinValue();
     EncodedValue maxVal = getColStats()->getMaxValue();


     double hi = maxVal.getDblValue();
     double lo = minVal.getDblValue();

     // calculate the multiplier we will use for scaling some constants we want to add
     //   to the hi/lo/cn.
     double scaleMult = 1;
     Lng32 scale = column.getType().getScale();
     for ( Lng32 i = 0; i < scale; i++ )
     {
       if ( scale > 0 )
 	scaleMult = scaleMult / 10.0;
       else
 	scaleMult = scaleMult * 10.0;
     }

     // Since the ration function always calculates  (cn-lo)/(hi-lo).
     //    it doesn't add one to hi-lo to get the correct range.
     // Ensure that the calculation is accurate for small ranges - avoiding overflow.

     if ( lo > 0 )
       lo = lo - scaleMult;
     else if ( hi < ( scaleMult * 1000000 ) )
       hi = hi + scaleMult;
     minVal = EncodedValue( lo );
     maxVal = EncodedValue( hi );

     // Adjust the constant value for >= and <=, each of which add 1 to range
     double rangeInc = -1;  //(-1) since we will add lo+cn, subtract out extra value.
     if ( op == ITM_GREATER_EQ || op == ITM_LESS_EQ )
       rangeInc += 1;
     if ( appliedOp == ITM_GREATER_EQ || appliedOp == ITM_LESS_EQ )
       rangeInc += 1;

     rangeInc = rangeInc * scaleMult;

     if ( lo + cn + rangeInc  > hi )
       return csOne;

     // the ratio function uses (val-lo) to get the numerator
     //   and (hi-lo) for the denominator while avoiding overflow
     EncodedValue testVal = EncodedValue( lo + cn + rangeInc );
     sel = testVal.ratio( minVal, maxVal );
     if ( sel.getValue() < 0.0 || sel.getValue() > csOne.getValue() )
       return csOne;
     CostScalar defaultSel =
       CURRSTMT_OPTDEFAULTS->defSelForRangePred();

     // For window predicates like col between ? and ?+const,
     // (it gets converted as col >= ? and col <= ?+const).
     // We come to this point for the second predicate. i.e for col <= ?+const.
     // The selectivity for the first predicate col >= ? is already applied,
     // which is the default selectivity.
     // So here make sure the selectivity will not go beyond defaultSel.

     // The caller of this method (applyDefaultPred) will call applySel to apply
     // the selectivity. Which in a way multiplies the selectivity for the
     // predicates. In this particular case, since it is a window predicate and
     // the selectivity should be the selectivity of the window we just
     // calculated, we should not multiply the selectivities.
     // Since the multiplication happens unconditionally in the caller, divide
     // it by the defaultSel.
     // If the selectivity is equal to defaultSel return 0.999999 (if we
     // return 1 (csOne) then it is considered as fakeHistogram and again
     // the defalutSel will be applied.
     if (sel >= defaultSel)
       return 0.999999;
     else
       sel = sel / defaultSel;

     // Sanity check to make sure the cardinality did not go below average rowcount.
     CostScalar selForAvgRowcount = 1/getColStats()->getUecBeforePreds().getValue();
     if(sel < selForAvgRowcount)
       sel = selForAvgRowcount;
   }
   return sel;
 }

 void
 ColStatDesc::print (FILE *ofd,
                     const char * prefix,
                     const char * suffix,
                     CollHeap *c, char *buf,
                     NABoolean hideDetail) const
 {
   Space * space = (Space *)c;

   char mybuf[1000];

   if (!hideDetail)
   {
     snprintf(mybuf, sizeof(mybuf), "%scolumn:", prefix);
     PRINTIT(ofd, c, space, buf, mybuf);
     ValueIdList columns;
     columns.insert(column_);
     columns.print(ofd, prefix, suffix, c, buf);
   }
   snprintf(mybuf, sizeof(mybuf), "%sVEGcolumn:", prefix) ;
   PRINTIT(ofd, c, space, buf, mybuf);
   ValueIdList VEGcolumns;
   VEGcolumns.insert(VEGcolumn_);
   VEGcolumns.print(ofd, prefix, suffix, c, buf) ;

   snprintf(mybuf, sizeof(mybuf), "%sMerge state:", prefix) ;
   PRINTIT(ofd, c, space, buf, mybuf);
   mergeState_.print(ofd, prefix, suffix, c, buf) ;

   snprintf(mybuf, sizeof(mybuf), "%sApplied preds:", prefix) ;
   PRINTIT(ofd, c, space, buf, mybuf);
   appliedPreds_.print(ofd, prefix, suffix, c, buf) ;

   if (getColStats() == NULL)
   {
     snprintf(mybuf, sizeof(mybuf), "NULL colStats_!\n");
     PRINTIT(ofd, c, space, buf, mybuf);
   }
   else
   {
     getColStats()->print(ofd,prefix,suffix, c, buf, hideDetail);
   }
 }

 void
 ColStatDesc::display() const
 {
   print();
 }

 // ------------------------------------------------------------------------
 //  Methods for ColStatDescList Class
 // ------------------------------------------------------------------------

 // some users of the CSDL class want to be able to destroy the temporary
 // objects they create

 void ColStatDescList::destroy()
 {
   while ( entries() > 0 )
     removeDeepCopyAt( 0 );
 }

 // Returns TRUE if the given ColStatDesc is contained in this
 // ColStatDescList. The ColStatDesc could have also been merged
 // with the other colStatDesc

 NABoolean
 ColStatDescList::contains(const ValueId & column) const
 {
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     // The "merge state" of a ColStatDesc indicates all the
     // columns that have been merged into this ColStatDesc.
     // Initially, the merge state consists of the original
     // base table column, therefore, this will work even for
     // ColStatDesc's that have not been merged
     const ValueIdSet & msSet = (*this)[i]->getMergeState();
     if ( msSet.contains( column ) )
       return TRUE;
   }

   return FALSE;
 }

 // Returns TRUE if the given ColStatDesc is contained in this
 // ColStatDescList. The ColStatDesc could have also been merged
 // with the other colStatDesc

 NABoolean
 ColStatDescList::contains(const ValueIdList & colList) const
 {
   ColStatsSharedPtr colStats;
   for(CollIndex i =0; i<colList.entries(); i++)
   {
     colStats = this->getColStatsPtrForColumn(colList[i]);

     // get the histogram pointer for the column in list
     if (colStats == NULL)
     {
       // if the histogram is missing for this column
       return FALSE;
     }
   }
   return TRUE;
 }

 NABoolean
 ColStatDescList::containsAtLeastOneFake() const
 {

   // -----------------------------------------------------------------------
   // Go through the list of ColStatDesc and if you find
   // a fake ColStats set the valid flag to FALSE and return:
   // -----------------------------------------------------------------------

   NABoolean thereIsAFake = FALSE;
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     if ( (*this)[i]->getColStats()->isFakeHistogram() )
     {
       thereIsAFake = TRUE;
       break;
     }
   }

   return thereIsAFake;
 } // ColStatDescList::containsAtLeastOneFake() const

 NABoolean
 ColStatDescList::selectivityHintApplied() const
 {
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     if ( (*this)[i]->getColStats()->isSelectivitySetUsingHint() )
       return TRUE;
   }
   return FALSE;
 }

 // ---------------------------------------------------------------------
 // Methods for doing Deep Copies of the ColStatDescs whose pointers are
 // inserted into a ColStatDescList.
 // In the various routines,
 // 'firstN' specifies that only the first N entries in the source are to
 //    be inserted.
 // 'scale' specifies the factor by which the RowCounts (not UECs) should
 //    be multiplied.
 // 'shapeChangedMask' is AND'd with the current setting of the shape-
 //    changed flag, allowing it to be either left alone (the default) or
 //    cleared.
 // ---------------------------------------------------------------------

 // Macro to make a CSD copy and set it up correctly.
 #define ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( x ) \
   ColStatDescSharedPtr tmpColStatDescPtr(new (HISTHEAP) ColStatDesc( x ), HISTHEAP); \
   ColStatsSharedPtr tmpColStatsPtr = tmpColStatDescPtr->getColStatsToModify(); \
   tmpColStatsPtr->copyAndScaleHistogram( scale ); \
   tmpColStatsPtr->setShapeChanged( \
     shapeChangedMask && tmpColStatsPtr->isShapeChanged() );

 void
 ColStatDescList::makeDeepCopy (const ColStatDescList & source,
                                const CostScalar & scale,
                                const NABoolean shapeChangedMask)
 {
   for ( CollIndex i = 0; i < source.entries(); i++ )
   {
     ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( *source[i] );
     insertAt( i, tmpColStatDescPtr );
   }

   setUecList( source.getUecList() );
 } // makeDeepCopy

 void
 ColStatDescList::appendDeepCopy (const ColStatDescList & source,
                                  const CollIndex firstN,
                                  const CostScalar & scale,
                                  const NABoolean shapeChangedMask)
 {
   CollIndex thisEntries = entries();

   for ( CollIndex i = 0; i < source.entries() && i < firstN; i++ )
   {
     ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( *source[i] );
     insertAt( thisEntries, tmpColStatDescPtr );
     thisEntries++;
   }

   // add the other CSDL's uec list to this one
   insertIntoUecList( source.getUecList() );
 } // appendDeepCopy

 void
 ColStatDescList::prependDeepCopy (const ColStatDescList & source,
                                   const CollIndex firstN,
                                   const CostScalar & scale,
                                   const NABoolean shapeChangedMask)
 {
   for ( CollIndex i = 0; i < source.entries() && i < firstN; i++ )
   {
     ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( *source[i] );
     insertAt( i, tmpColStatDescPtr );
   }

   // add the other CSDL's uec list to this one
   insertIntoUecList( source.getUecList() );
 } // prependDeepCopy

 void
 ColStatDescList::insertDeepCopy (const ColStatDescSharedPtr& source,
                                  const CostScalar & scale,
                                  const NABoolean shapeChangedMask)
 {
   ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( *source );
   insert( tmpColStatDescPtr );
 } // insertDeepCopy

 void
 ColStatDescList::insertDeepCopyAt (const CollIndex entry,
                                    const ColStatDescSharedPtr& source,
                                    const CostScalar & scale,
                                    const NABoolean shapeChangedMask)
 {
   ALLOCATE_COL_STAT_DESC_AND_SET_IT_UP( *source );
   insertAt( entry, tmpColStatDescPtr );
 } // insertDeepCopyAt

 void
 ColStatDescList::makeMappedDeepCopy (const ColStatDescList & source,
                                      ValueIdMap &map,
                                      NABoolean includeUnmappedColumns)
 {
   // similar to makeDeepCopy, but this method maps all the ValueIds in
   // the source, using the provided map, in the "up" direction
   for ( CollIndex i = 0; i < source.entries(); i++ )
     {
       ValueId topVid;
       ValueId bottomVid(source[i]->getVEGColumn());

       map.mapValueIdUp(topVid, bottomVid);

       if (includeUnmappedColumns || topVid != bottomVid)
         {
           ColStatDescSharedPtr tmpColStatDescPtr(
                new (HISTHEAP) ColStatDesc(), HISTHEAP);
           tmpColStatDescPtr->mapUpAndCopy(*source[i], map);
           ColStatsSharedPtr tmpColStatsPtr = tmpColStatDescPtr->getColStatsToModify();
           tmpColStatsPtr->copyAndScaleHistogram(1.0);
           tmpColStatsPtr->setShapeChanged(tmpColStatsPtr->isShapeChanged());

           insert(tmpColStatDescPtr);
         }
     }

   // map the multi-column UECs
   MultiColumnUecList * mappedMultiColUECs = new (CmpCommon::statementHeap())
       MultiColumnUecList();

   mappedMultiColUECs->insertMappedList(source.getUecList(), map);
   setUecList(mappedMultiColUECs);
 } // makeMappedDeepCopy

 void
 ColStatDescList::removeDeepCopyAt (const CollIndex entry)
 {
   removeAt( entry );
 } // removeDeepCopyAt

 // -------------------------------------------------------------------
 // copyAndScaleHistogram
 // This method goes thru each colStatDesc in the list and scales the
 // underlying histogram based on the scale and the row reduction factor
 // and uec reduction factors specified in the colStat header.
 // --------------------------------------------------------------------

 void
 ColStatDescList::copyAndScaleHistograms(CostScalar scale)
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
     ColStatDescSharedPtr columnStatDesc = (*this)[i];
     ColStatsSharedPtr columnStats = columnStatDesc->getColStatsToModify();
     columnStats->scaleHistogram(scale);
     columnStats->computeMaxFreqOfCol(!(CmpCommon::getDefault(COMP_BOOL_42)));
   }
 }

 void
 ColStatDescList::setBaseUecToTotalUec()
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
     ColStatDescSharedPtr columnStatDesc = (*this)[i];
     ColStatsSharedPtr columnStats = columnStatDesc->getColStatsToModify();
     columnStats->setBaseUec(columnStats->getTotalUec());
   }
 }

 void
 ColStatDescList::computeMaxFreq(NABoolean forced)
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
     ColStatDescSharedPtr columnStatDesc = (*this)[i];
     ColStatsSharedPtr columnStats = columnStatDesc->getColStatsToModify();
     columnStats->computeMaxFreqOfCol(forced);
   }
 }

 // add colStatDesc for a constant in this colStatDescList.
 // This will be used for cases like inserts, transpose or rowsets.
 // Since there are no columns associated with these histograms
 // they are treated in a special manner. These are distinguished
 // from regular histograms by means of a flag virtualColForHist_

 void ColStatDescList::addColStatDescForVirtualCol(const CostScalar & uec,
                                                   const CostScalar & rowCount,
                                                   const ValueId  colId,
                                                   const ValueId vegCol,
                                                   const ValueId mergeState,
                                                   const RelExpr * expr,
                                                   NABoolean defineVirtual)
 {
   // create a frequent value list
   FrequentValueList * frequentValueList = new (STMTHEAP) FrequentValueList(STMTHEAP);
   NABoolean fakeHist = TRUE;
   EncodedValue minValue = UNINIT_ENCODEDVALUE;
   EncodedValue maxValue = UNINIT_ENCODEDVALUE;

   // If the histogram is being created for a tuple expr create a frequent value list
   // with tuple expressions.
   if ((defineVirtual == FALSE) &&
        expr &&
        (expr->getOperatorType() == REL_TUPLE_LIST) )
   {
     fakeHist = FALSE;
     ItemExprList tList(((Tuple *) expr)->tupleExprTree(), STMTHEAP);

     if (tList.entries() <= CURRSTMT_OPTDEFAULTS->histTupleFreqValListThreshold())
     {
       for (CollIndex i = 0; (i < (CollIndex) tList.entries());i++)
       {
 	ItemExpr * tupleVal =
 	  ((ItemExpr *) tList[i])->castToItemExpr();
         if (tupleVal == NULL)
           continue;

         ValueIdSet leafs;
         tupleVal->getLeafValueIds(leafs);

         if (leafs.entries() == 0)
           continue;

         for (ValueId val = leafs.init();
             leafs.next(val);
             leafs.advance(val))
         {
           if (val == NULL_VALUE_ID)
             continue;

           ItemExpr * leafVal = val.getItemExpr();

 	  if (leafVal->getOperatorType() == ITM_CONSTANT)
 	  {
             // add the constant to the frequent value list
             NABoolean neg = FALSE;
             ConstValue * cv = leafVal->castToConstValue(neg);
             if (cv == NULL) continue;
             EncodedValue ev(cv, neg);
             UInt32 hashValue = 0;
             if (cv->getType()->useHashRepresentation() &&
                 cv->getType()->useHashInFrequentValue())
               hashValue = cv->computeHashValue(*(cv->getType()));
             FrequentValue newV(hashValue, csOne, csOne, ev);
             frequentValueList->insertFrequentValue(newV);
 	  } // if leafVal is a constant
         } // for all leaf values.
       } // for all elements in the IN list
     } // if number of elements in IN list is less than the threshold
   } // if the histogram is being created for tuple_list

     // finally get the in and the max values from the frequent value list
     // these will be used to set the min and the max values of the histograms
   // if frequentValueList is empty, maxValue = number of values in the tuple list
     if (frequentValueList->entries() > 0)
     {
       minValue = (*frequentValueList)[0].getEncodedValue();
       maxValue = (*frequentValueList)[frequentValueList->entries() - 1].getEncodedValue();
     }
     else
     {
       maxValue = uec.getValue();
     }

   HistogramSharedPtr emptyHist(new (HISTHEAP) Histogram(HISTHEAP));
   HistInt newFirstHistInt(minValue, FALSE);
   newFirstHistInt.setCardAndUec(0, 0);

   HistInt newSecondHistInt(maxValue, FALSE);
   newSecondHistInt.setCardAndUec(rowCount, uec);

   emptyHist->insert(newFirstHistInt);
   emptyHist->insert(newSecondHistInt);

   ComUID id(ColStats::nextFakeHistogramID());
   ColStatsSharedPtr fakeColStats(
                         new (HISTHEAP) ColStats(id,
                         uec,
                         rowCount,
                         rowCount,
                         FALSE,
                         FALSE,
                         emptyHist,
                         FALSE,
                         1.0,
                         1.0,
                         -1,         // default avg VarChar size
                         HISTHEAP));

     // This histogram is not an actual histogram
   fakeColStats->setFakeHistogram( fakeHist);
   fakeColStats->setFrequentValue(*frequentValueList);

   // Setting this flag will ensure that the compiler does not start
   // to look for this column name in the NAColumn. As there does not
   // exist a column for a constant
   fakeColStats->setVirtualColForHist ( defineVirtual );

   fakeColStats->setIsCompressed (TRUE);

   fakeColStats->setRowsAndUec(rowCount, uec);
   if (rowCount == uec)
   {
     fakeColStats->setAlmostUnique(TRUE);
     fakeColStats->setUnique(TRUE);
   }

   ColStatDescSharedPtr fakeStatDesc(new (HISTHEAP)
                  ColStatDesc (fakeColStats,colId), HISTHEAP);

   fakeStatDesc->VEGColumn() = vegCol;

   fakeStatDesc->mergeState().clear() ;
   fakeStatDesc->mergeState().insert(mergeState);

   this->insert(fakeStatDesc);
 } // ColStatDescList::addColStatDescForVirtualCol

 // ---------------------------------------------------------------------
 // isPredTransformedInList
 //
 // subroutine of CSDL::estimateCardinality()
 //
 // NB: It is extremely important that this routine be tail-recursive;
 // otherwise it is sure to overflow the stack for large IN-lists, which is
 // the motivation for writing this code in the first place!
 // ---------------------------------------------------------------------

 NABoolean isPredTransformedInList( const ValueId & predId,
 				   const ValueId & column,
 				   Int32 & leaves )
 {
   const ItemExpr * pred = predId.getItemExpr();
   OperatorTypeEnum op = pred->getOperatorType();

   if ( op != ITM_OR )
     return FALSE;

   // first make sure the right child conforms

   const ValueId & Rid = pred->child(1)->getValueId();
   const ItemExpr * Rpred = Rid.getItemExpr();
   const OperatorTypeEnum Rop = Rpred->getOperatorType();

   if ( Rop != ITM_EQUAL )
     return FALSE;

   const ValueId & RidLeftChild = Rpred->child(0)->getValueId();

   if ( RidLeftChild != column )
     return FALSE;

   // now make sure the left child conforms

   const ValueId & Lid = pred->child(0)->getValueId();
   const ItemExpr * Lpred = Lid.getItemExpr();
   const OperatorTypeEnum Lop = Lpred->getOperatorType();

   // base case : an ITM_OR between two ITM_EQUALs
   if ( Lop == ITM_EQUAL )
   {
     const ValueId & LidLeftChild = Lpred->child(0)->getValueId();

     leaves += 2;

     if ( LidLeftChild == column )
       return TRUE;
     else
       return FALSE;
   }
   else if ( Lop == ITM_OR ) // recursive case -- right child OK, now recurse down left child
   {
     leaves++;
     return isPredTransformedInList( Lid, column, leaves );
   }
   else
     return FALSE;
 }

 ULng32 ValueIdHashFn (const ValueId & key) { return (CollIndex) key ; }

 // ------------------------------------------------------------------------
 // $$$$ NOTE:
 // The method ColStatDescList::estimateCardinality() can, as a side effect,
 // update the unique entry count in the supplied ColStatsDescList.
 //
 // Also, use of ColStatDescList::applyVEGPred, or ColStatDescList::applyPred
 // can cause the number of column stats for outer references to be decreased,
 // altering the in/out parameter numOuterColStats.
 // ------------------------------------------------------------------------

 CostScalar
 ColStatDescList::estimateCardinality (const CostScalar & initialRowCount,
                                       const ValueIdSet & setOfPredicates,
                                       const ValueIdSet & outerReferences,
                                       const Join * expr,
 				      const SelectivityHint * selHint,
 				      const CardinalityHint * cardHint,
                                       CollIndex & numOuterColStats,
                                       ValueIdSet & unresolvedPreds,
                                       MergeType mergeMethod,
                                       OperatorTypeEnum exprOpCode,
                                       CostScalar *maxSelectivity)
 {
   // -----------------------------------------------------------------------
   //   1) estimateCardinality is computing expected cardinality by
   //      implicitly computing the composite selectivity of a conjunct of
   //      predicate terms:
   //        p1 AND p2 AND ... pn
   //      as the product of the individual selectivities:
   //        selectivity(p1 AND p2 AND ... pn) =
   //        selectivity(p1) * selectivity(p2) * ... selectivity(pn)
   //   2) this computation is diffused across multiple methods which
   //      routinely modify histograms and/or sometimes directly manipulate
   //      a rowcount argument.
   //   3) estimateCardinality assumes it is computing expected cardinality
   //      and expected cardinality alone.
   // Therefore, to avoid problems such as those reported in genesis cases
   // 10-080530-0291, 10-080530-0305, solution 10-080530-3538, we have to:
   //   a) logically separate estimateMaxSelectivity from estimateCardinality,
   //   b) have them work on separate copies of the same ColStatDescList.
   // But, to minimize code duplication and maintenance, we have to physically
   // keep only one method. So, estimateCardinality has both the expected
   // cardinality and the max selectivity code.
   //
   // For computing maximum cardinality estimate, we need max selectivity to
   // be computed as:
   //   maxSelectivity(p1 AND p2 and ... pn) =
   //   MIN(maxSelectivity(p1), maxSelectivity(p2), ... maxSelectivity(pn))
   //
   // The predicate conjuncts are spread across various predicate categories
   // which are evaluated at different points in the code. Therefore, this MIN
   // will often be seen in the code as
   //   maxSelectivity = MINOF( <someSelectivityExpr>, maxSelectivity )
   // -----------------------------------------------------------------------
   // NB: we are here to compute either:
   //       1) expected cardinality, or
   //       2) max selectivity
   // When we are doing 1) we should avoid doing 2) and vice versa.
   // We are doing 1) when maxSelectivity==NULL.
   // We are doing 2) when maxSelectivity!=NULL.
   // -----------------------------------------------------------------------

   // If there is any cardinality hint is given return that, even
   // if there are no predicates to be applied. This would give more
   // control on the user to play around with the base cardinality
   // indirectly

   CollIndex i;

   CostScalar newRowCount  = MIN_ONE_CS( initialRowCount );
   ValueIdSet * predsWithNoHints = new (STMTHEAP) ValueIdSet(setOfPredicates);

     // Note: Can apply Default selectivity, even if no column list....
  if (maxSelectivity == NULL) {
   if ( setOfPredicates.entries() == 0 OR this->entries() == 0 )
   {
       // This is the rowcount without using hints. Save it
       setScanRowCountWithoutHint(initialRowCount);

     // Normally if there is no cardinalityHint given, there is nothing much
     // we can do in the absence of predicates or the histograms. So we return
     // from there. But if there is a hint given, then we can apply the hint
     // on the initial row count and return. This would be useful for cases
     // where we want to uplift the base cardinality of the histograms
     if (cardHint == NULL)
       return initialRowCount;
     else
     {
       newRowCount = cardHint->getScanCardinality();

       if((newRowCount.getValue() - floor(newRowCount.getValue())) > 0.00001)
       newRowCount = MIN_ONE_CS(ceil(newRowCount.getValue()) );

       return newRowCount;
     }
   }
  } // maxSelectivity == NULL

   // Now apply all the predicates on the histograms

   CostScalar tempRowcount = MIN_ONE_CS( initialRowCount );

   // all CSD's should have the same rowcount!
   if (maxSelectivity == NULL)
   {
     // (this code in its own scope so it can easily be cut-and-pasted anywhere)
     CollIndex limit = ( ( mergeMethod == SEMI_JOIN_MERGE ||
                           mergeMethod == ANTI_SEMI_JOIN_MERGE ) ?
                         numOuterColStats : entries() );
     // first, check from 0..limit-1
     enforceInternalConsistency( 0, limit );
     // next, check from limit..entries()
     enforceInternalConsistency( limit, entries() );

     const CostScalar & matchRowcount =
       (*this)[0]->getColStats()->getRowcount();
     // try this out : if the current rowcount isn't what it's "supposed" to be,
     // set it as requested and see how things go
     if ( matchRowcount != newRowCount )
       synchronizeStats( matchRowcount, newRowCount, limit );
   } // maxSelectivity == NULL

   ValueIdSet EqLocalPreds, OtherLocalPreds, EqNonLocalPreds,
              OtherNonLocalPreds, BiLogicPreds, DefaultPreds;

   setOfPredicates.categorizePredicates (outerReferences, EqLocalPreds,
                                         OtherLocalPreds, EqNonLocalPreds,
                                         OtherNonLocalPreds, BiLogicPreds,
                                         DefaultPreds);

   NAHashDictionary<ValueId, CostScalar> biLogicPredReductions // <ValueId, rowred> pairs
     (&(ValueIdHashFn),       11, TRUE, HISTHEAP) ;
   //          hash fn, initsize, uniq, heap


   ValueIdSet subDefPreds;
   if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON )
   {
     for( ValueId subVid = DefaultPreds.init();
 	 DefaultPreds.next( subVid );
 	 DefaultPreds.advance( subVid ) )
     {
       ValueIdSet vs;
       ItemExpr * pred = subVid.getItemExpr();
       if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC )
       {
 	((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs);
 	vs.categorizePredicates (outerReferences, EqLocalPreds,
 				 OtherLocalPreds, EqNonLocalPreds,
 				 OtherNonLocalPreds, BiLogicPreds,
 				 subDefPreds);

 	DefaultPreds.remove(subVid);
 	if (subDefPreds.entries() > 0)
 	{
 	  DefaultPreds += subDefPreds;
 	  subDefPreds.clear();
 	}
       }
     } // for
   } //if

   // -----------------------------------------------------------------------
   //  Apply AND / OR logical predicates first
   // -----------------------------------------------------------------------
       newRowCount = applyBiLogicPred(
 	tempRowcount,
 	BiLogicPreds,
 	outerReferences,
         expr,
 	selHint,
 	cardHint,
 	numOuterColStats,
 	unresolvedPreds,
 	mergeMethod,
 	biLogicPredReductions,
 	exprOpCode,
 	maxSelectivity
 	);

 	// If the histograms have been modified while applying OR / AND predicates,
 	// then they should be scaled, else we end up computing selectivity based on
 	// the old rowcount and uec. This would happen only when there were some
 	// OR / AND predicates in the query since these predicates synchronize
 	// all histograms to have the same rowcount but do not modify the histogram
 	// intervals. Rest all other predicates do not synchronize histograms till
 	// all the predicates have been applied, hence their aggregate rowcount is
 	// equal to the total rowcount from all histogram intervals. TPCH Q07 is a
 	// good example, where if the histograms are not scaled, it results in high
 	// join cardinalities
       if (maxSelectivity == NULL) {
 	for (i = 0; i < entries(); i++)
 	{
 	  if ((*this)[i]->getColStats()->getRedFactor() != csOne)
 	  {
 		ColStatsSharedPtr thisColStats = (*this)[i]->getColStatsToModify();
 		thisColStats->scaleHistogram(1);
 	  }
 	}
       } // maxSelectivity == NULL

 	ValueId id;
 	ValueIdSet nonVEGEqualPredSet;

 	// do all non-VEG preds together
 	OtherLocalPreds.insert( OtherNonLocalPreds );

 	for( id = OtherLocalPreds.init();
 		 OtherLocalPreds.next( id );
 		 OtherLocalPreds.advance( id ) )
 	{
 	  ItemExpr * pred = id.getItemExpr();

 	  if(pred->getOperatorType() == ITM_EQUAL)
 	  {
 	    nonVEGEqualPredSet.insert(id);
 	    OtherLocalPreds.remove(id);
 	  }
 	}

 	// predicate application : we do it "all at once" (previously, we used
 	// to re-synchronize histograms after every predicate application)

 	CostScalar rowcountBeforePreds = newRowCount;

 	// in the case of a [ANTI_]SEMI_JOIN, the outer reference columns have a
 	// different rowcount-before-preds :
 	CostScalar semiJoinRowcountBeforePreds = csZero;
 	if (  ( mergeMethod == SEMI_JOIN_MERGE ||
 			mergeMethod == ANTI_SEMI_JOIN_MERGE )
 		 AND
 		  ( numOuterColStats < this->entries() )
               AND maxSelectivity == NULL
 	   )
 	{
 	  semiJoinRowcountBeforePreds =
 		(*this)[numOuterColStats]->getColStats()->getRowcount();
 	}

 	// -----------------------------------------------------------------------
 	// Apply VEG predicates: Local and non-local
 	//   Column stats for outer references are included in THIS.
 	// -----------------------------------------------------------------------

 	// do all VEG preds together
 	EqLocalPreds.insert( EqNonLocalPreds );

 	for( id = EqLocalPreds.init();
 		 EqLocalPreds.next( id );
 		 EqLocalPreds.advance( id ) )
 	{
 	  tempRowcount = rowcountBeforePreds;

 	  ItemExpr * pred = id.getItemExpr();

 	  if ( !applyVEGPred( pred, tempRowcount, numOuterColStats,
                               mergeMethod, exprOpCode, maxSelectivity ) )
 		DefaultPreds.insert( id );

           if (maxSelectivity == NULL)
 	  newRowCount = tempRowcount;
 	}
   // -----------------------------------------------------------------------
   // Apply non-VEG equality predicates: Local and non-local
   // -----------------------------------------------------------------------

   for( id = nonVEGEqualPredSet.init();
        nonVEGEqualPredSet.next( id );
        nonVEGEqualPredSet.advance( id ) )
   {

     ItemExpr * pred = id.getItemExpr();

     tempRowcount = rowcountBeforePreds;

     if ( !applyPred( pred,
 		     tempRowcount,
 		     numOuterColStats,
 		     mergeMethod,
              exprOpCode,
              maxSelectivity
 		   )
 	)
     {
       DefaultPreds.insert( id );
     }
     else if (maxSelectivity == NULL)
       newRowCount = tempRowcount;
   }

   // predCountSC is used to collect the number of histograms reduced after applying predicates on single
   // column histograms
   CollIndex predCountSC = 0;
   if(exprOpCode == REL_SCAN && !maxSelectivity && (CmpCommon::getDefault(COMP_BOOL_67) == DF_ON))
     predCountSC += biLogicPredReductions.entries();

   // add another variable to count the number of columns reduced that should be considered for MC adjustment
   CollIndex predCountMC = predCountSC;

   CostScalar rowRedProduct = csOne;
   CostScalar rowRedFromEquiPreds = csOne;
   CostScalar rowRedAfterMCUECAdj = csOne;

   CollIndex loopLimit = ( ( mergeMethod == SEMI_JOIN_MERGE ||
                             mergeMethod == ANTI_SEMI_JOIN_MERGE ) ?
                           numOuterColStats : entries() );

   // ---------------------------------------------------------------------
   // now we see whether we can apply multi-column uec info, in the case
   // of a multi-column join between two tables
   // ---------------------------------------------------------------------
   // we can if:
   //   1. multiple joins have been performed (isResultOfJoin() is true for >1)
   //   2. we have multicolumn-uec information that matches those columns
   // --> this is all handled by the subroutine
   //     CSDL::useMultiUecIfMultipleJoins()
   //
   // The following call to useMultiUecIfMultipleJoins() will adjust the
   // value of newRowCount _iff_ there has been a multi-column join and
   // the necessary multi-column uec information exists.
   //
   // NB: we may need to undo some of the adjustments done in the call to
   // useMultiUecIfCorrelatedPreds() above.
   // ---------------------------------------------------------------------
   CollIndexList joinHistograms(STMTHEAP); // the CSDL-indices of the join histograms

   if (maxSelectivity == NULL)
   {
     computeRowRedFactor(mergeMethod, numOuterColStats, rowcountBeforePreds, predCountSC, predCountMC, rowRedProduct);

     // ---------------------------------------------------------------------
     // how to calculate the resulting rowcount ...?
     // ---------------------------------------------------------------------
     // first a first approximation, we use the simplest formula, ignoring
     // multi-column uec info
     // ---------------------------------------------------------------------
     rowRedFromEquiPreds = rowRedProduct;
     newRowCount = rowRedProduct * rowcountBeforePreds;

     this->addRecentlyJoinedCols(0, loopLimit);

     // Use comp_int_40 to compute the reduction beyond which mc adjustment should be done
     // Default value for the CQD is 10, which means we use multi-columns for cardinality
     // adjustment, only if the reduction from single column histograms is more than 90%

     NABoolean doMCAdjust;
     double mcThreshold = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_40);
     if (mcThreshold >= 1 && mcThreshold <= 100)
     {
       double adjustment = 1.0 - (mcThreshold/100);
       if (newRowCount.getValue() < (rowcountBeforePreds.getValue()* adjustment))
         doMCAdjust = TRUE;
       else
         doMCAdjust = FALSE;
     }
     else
     {
       if (newRowCount < rowcountBeforePreds)
         doMCAdjust = TRUE;
       else
         doMCAdjust = FALSE;
     }

     // For semi_joins, the newRowCount cannot be greater than the left child's
     // row count, and if we have already reached that row count, then we don't
     // need to uplift it further using multi_column stats
     if ( ( (mergeMethod != SEMI_JOIN_MERGE) &&
 	  (mergeMethod != ANTI_SEMI_JOIN_MERGE) ) ||
 	  doMCAdjust )
     {
       this->useMultiUecIfMultipleJoins(
 	  newRowCount,         /* in/out */
 	  rowcountBeforePreds, /* in */
 	  0,                   /* in: start idx */
 	  loopLimit,           /* in: end idx+1 */
 	  joinHistograms,      /* out */
           expr,
           mergeMethod
 	  );

     // ---------------------------------------------------------------------
     // now we see whether we can apply multi-column uec info, in the case of
     // predicates on 2+ columns in the table
     // ---------------------------------------------------------------------
     // idea: if those columns are correlated exactly or almost exactly, then
     // the resulting reduction should be less than the product of the 2+
     // reductions -- that is, by multiplying these two reductions together
     // we'll end up removing more rows in our estimate than we should.
     //
     // The underlying assumption here is that the two predicates being applied
     // to the two correlated columns are somehow "redundant" -- that is, both
     // predicates remove some of the "same rows".
     // ---------------------------------------------------------------------
     // At this point we need to take a look at the predicates that have been
     // applied to the various table columns.  If we have applied two
     // predicates to the same table, and those two predicates are correlated,
     // then we need to adjust the overall reduction.
     //
     // The following call to useMultiUecIfCorrelatedPreds() will
     // adjust the value of newRowCount _iff_ this is appropriate, as
     // outlined above.
     // ---------------------------------------------------------------------
     this->useMultiUecIfCorrelatedPreds(
       newRowCount,         /* in/out */
       rowcountBeforePreds, /* in */
       predCountMC,           /* in: # preds */
       joinHistograms,      /* in: hists in join */
       0,                   /* in: start idx */
       loopLimit,		 /* in: end idx+1 */
       biLogicPredReductions
       );
      }
     rowRedAfterMCUECAdj = newRowCount/rowcountBeforePreds;
   } // maxSelectivity == NULL

  // -----------------------------------------------------------------------
   // Apply other supported predicates: Local and non-local
   // -----------------------------------------------------------------------

   for( id = OtherLocalPreds.init();
        OtherLocalPreds.next( id );
        OtherLocalPreds.advance( id ) )
   {

     ItemExpr * pred = id.getItemExpr();

     tempRowcount = rowcountBeforePreds;

     if ( !applyPred( pred,
 		     tempRowcount,
 		     numOuterColStats,
 		     mergeMethod,
              exprOpCode,
              maxSelectivity
 		   )
 	)
     {
       DefaultPreds.insert( id );
     }
     else if (maxSelectivity == NULL)
       newRowCount = tempRowcount;
   }

   // -----------------------------------------------------------------------
   // Apply predicates with default selectivity
   // -----------------------------------------------------------------------
   CostScalar allPredsGlobalReduction = csOne;

   for( id = DefaultPreds.init();
        DefaultPreds.next( id );
        DefaultPreds.advance( id ) )
   {
     tempRowcount = rowcountBeforePreds;

     ItemExpr * pred = id.getItemExpr();

     // don't repeat the application of any default predicate.
     if ( NOT unresolvedPreds.contains( id ) )
     {
       CostScalar thisPredGlobalReduction = csOne; // initialize it just in case
       applyDefaultPred( pred, thisPredGlobalReduction, exprOpCode,
                         maxSelectivity );

      if (maxSelectivity == NULL) {
       if ( thisPredGlobalReduction.getValue() > 0.0)
       {
 	if (CostScalar(DBL_MIN)/thisPredGlobalReduction < allPredsGlobalReduction)
 	  allPredsGlobalReduction *= thisPredGlobalReduction;
       }
       else
 	allPredsGlobalReduction = csZero;

       // keep note of all predicates for which we've applied default sel
       unresolvedPreds.insert( id );
      } // maxSelectivity == NULL
     }
   }

   // maxSelectivity computation is done
   if (maxSelectivity) return newRowCount;

   // -----------------------------------------------------------------------
   // Have now applied all predicates; now figure out the correct resulting
   // rowcount and normalize all of the histograms to have that rowcount.
   // -----------------------------------------------------------------------

   rowRedProduct = csOne;
   computeRowRedFactor(mergeMethod, numOuterColStats, rowcountBeforePreds, predCountSC, predCountMC, rowRedProduct);

   // -----------------------------------------------------------------------
   // Reduction from equality predicates; Avoid applying reduction from equality
   // predicates twice and also account for MC UEC adjustment
   // -----------------------------------------------------------------------
   rowRedProduct *= (rowRedAfterMCUECAdj/rowRedFromEquiPreds);

   // -----------------------------------------------------------------------
   // don't forget to apply the effects of global (default) predicates!
   // -----------------------------------------------------------------------
   rowRedProduct *= allPredsGlobalReduction;

   // ---------------------------------------------------------------------
   // how to calculate the resulting rowcount ...?
   // ---------------------------------------------------------------------
   // first a first approximation, we use the simplest formula, ignoring
   // multi-column uec info
   // ---------------------------------------------------------------------
   newRowCount = rowRedProduct * rowcountBeforePreds;

   //ensure that we include the largest int.
   if((newRowCount.getValue() - floor(newRowCount.getValue())) > 0.00001)
     newRowCount = ceil(newRowCount.getValue());

   newRowCount = MIN_ONE_CS(newRowCount);

   // after applying MC stats, ensure that the newRowCount has not exceeded
   // the left rowcount, which is the oldRowCount for [anti_]semi_join
   if (mergeMethod == SEMI_JOIN_MERGE ||
                             mergeMethod == ANTI_SEMI_JOIN_MERGE)
   {
     // if after applying MC stats, the new rowcount exceeded left row count
     // then max it out to left row count
     if (newRowCount > rowcountBeforePreds)
       newRowCount = rowcountBeforePreds;
   }

   // ---------------------------------------------------------------------
   // We've now computed the resulting rowcount; now normalize all
   // histograms to have that rowcount.
   // ---------------------------------------------------------------------
   for( i = 0; i < loopLimit; i++ )
   {
     // if this histogram already has a reduction factor applied to it,
     // apply only the remaining delta (not the entire reduction again)
     const CostScalar & oldCount = (*this)[i]->getColStats()->getRowcount();
     if ( oldCount != rowcountBeforePreds )  // only apply the delta
       (*this)[i]->synchronizeStats( oldCount, newRowCount );
     else
       (*this)[i]->synchronizeStats( rowcountBeforePreds, newRowCount );
   }

   tempRowcount = newRowCount;


   // -----------------------------------------------------------------------
   //
   // ALMOST DONE!
   //
   // we now need to handle the other columns in the [ANTI_]SEMI_JOIN case
   //
   // "initial rowcount" == semiJoinRowcountBeforePreds
   //
   // -----------------------------------------------------------------------
   // All the following code should be done only if we have a [anti_]semi_join
   CostScalar rowred = 0;
   if (mergeMethod == SEMI_JOIN_MERGE ||
       mergeMethod == ANTI_SEMI_JOIN_MERGE)
   {
     rowRedProduct = csOne;

     for( i = loopLimit; i < entries(); i++ )
     {
       if ( semiJoinRowcountBeforePreds.isGreaterThanZero() /* > csZero */)
 	rowred =
 	  (*this)[i]->getColStats()->getRowcount() / semiJoinRowcountBeforePreds;
       else
 	rowred = csZero;

       rowRedProduct *= rowred;
     }

     // don't forget to apply the effects of global (default) predicates!
     rowRedProduct *= allPredsGlobalReduction;

     // ---------------------------------------------------------------------
     // how to calculate the resulting rowcount ...?
     // ---------------------------------------------------------------------
     // 1. first, we use the simplest formula, ignoring multi-column uec info;
     // 2. then, we take the results of that formula, and see if we can apply
     // mc-uec info
     // ---------------------------------------------------------------------
     CostScalar newSemiJoinRowCount = rowRedProduct * semiJoinRowcountBeforePreds;
     CollIndexList joinHists(STMTHEAP); //the CSDL-indices of the join histograms

     this->addRecentlyJoinedCols(loopLimit, entries());

     this->useMultiUecIfMultipleJoins(
       newSemiJoinRowCount,         /* in/out */
       semiJoinRowcountBeforePreds, /* in */
       loopLimit,                   /* in: start idx */
       entries(),                   /* in: end idx+1 */
       joinHists,                   /* out */
       expr,
       mergeMethod
       );

     // Similarly for columns from the outer, after applying all changes,
     // we do not want the new row count to exceed the
     // initial row count, which for semi_joins is actually the row count
     // of the left child.
     if (newSemiJoinRowCount > rowcountBeforePreds)
       newSemiJoinRowCount = rowcountBeforePreds;

     for( i = loopLimit; i < entries(); i++ )
     {
       const CostScalar & oldCount = (*this)[i]->getColStats()->getRowcount();
       if ( oldCount != semiJoinRowcountBeforePreds ) // only apply the delta
 	(*this)[i]->synchronizeStats( oldCount, newSemiJoinRowCount );
       else
 	(*this)[i]->synchronizeStats( semiJoinRowcountBeforePreds,
 				      newSemiJoinRowCount );
     }

 #ifdef _DEBUG
     // ---------------------------------------------------------------------
     // After all this predicate application, the CSD's should all still have
     // the same rowcount
     // ---------------------------------------------------------------------
     {
       // (this code in its own scope so it can easily be cut-and-pasted anywhere)
       CollIndex limit = ( ( mergeMethod == SEMI_JOIN_MERGE ||
 			    mergeMethod == ANTI_SEMI_JOIN_MERGE ) ?
 			  numOuterColStats : entries() );
       // verifyInternalConsistency is quite redundant, as we do not do anything
       // in case of inconsistency between the CSDs. We should be much better of
       // enforcingInternalConsistency. replace verifyInternalConsistency
       // with enforceInternalConsistency
       // first, check from 0..limit-1
       enforceInternalConsistency( 0, limit );
       // next, check from limit..entries()
       enforceInternalConsistency( limit, entries() );

       if ( limit > 0 )
        {
          const CostScalar & matchRowcount =
            (*this)[0]->getColStats()->getRowcount();
          // try this out : if the current rowcount isn't what it's "supposed" to be
          // set it as requested and see how things go
          if ( matchRowcount != newRowCount )
            synchronizeStats( matchRowcount, newRowCount, limit );
        }
     }
 #endif

   }
   // done with predicate application

   // --------------------------------------------------------------------
   // Lastly, IF we're *NOT* the immediate child of a boolean _AND_
   // operator, undo any of the ColStatDescList length reduction resulting
   // from application of non-VEG-equality preds appearing in the
   // expression tree.
   // --------------------------------------------------------------------
   if ( exprOpCode != ITM_AND )
   {
     NABoolean redo = FALSE;
     CollIndex i = 0;

     while( i < entries() )
     {
       CollIndex moveI = 0;

       if( (*this)[i]->nonVegEquals().entries() != 0 )
       {
 	ColStatsSharedPtr rootColStats = (*this)[i]->getColStatsToModify();

 	while( (*this)[i]->nonVegEquals().entries() != 0 )
 	{
 	  // ----------------------------------------------------------
 	  // This ColStatDesc has been equated to other
 	  // ColStatDescs by a non-VEG Equality predicate.
 	  //
 	  // Replace the previously-removed ColStatDesc entry.
 	  //
 	  // One minor complication results from the possible
 	  // presence of an Outer Join, which as can be seen in
 	  // the above logic, doesn't remove entries from the
 	  // inner table's ColStatDesc List.
 	  // ----------------------------------------------------------
 	  ColStatDescSharedPtr tmpDesc = (*this)[i]->nonVegEquals()[0];
 	  ((*this)[i]->nonVegEquals()).removeAt( 0 );

           ValueId column = tmpDesc->getColumn();
           NABoolean found = FALSE;

           // -----------------------------------------------------------
           // Don't restore a ColStatDesc that already has an existing
           //  entry.
           // -----------------------------------------------------------
           for ( CollIndex j = 0; j < entries(); j++ )
            {
 	     if ( NOT found AND column == (*this)[j]->getColumn() )
                 found = TRUE;
            }
 	  if ( NOT found  &&  NOT ( tmpDesc->isFromInnerTable() &&
 		     mergeMethod == OUTER_JOIN_MERGE ) )
 	  {
 	    // update the copy of the removed entry, and re-insert
 	    ColStatsSharedPtr upColStats = tmpDesc->getColStatsToModify();
 	    upColStats->overwrite( *rootColStats );

 	    if( tmpDesc->isFromInnerTable() )
 	    {
 	      insertDeepCopyAt( numOuterColStats, tmpDesc );

 	      if ( i > numOuterColStats )
 		moveI++;
 	    }
 	    else
 	    {
 	      insertDeepCopyAt( 0, tmpDesc );

 	      numOuterColStats++;
 	      moveI++;
 	    }

 	    // Gen Sol:10-090218-9369:  Make sure any ColStatDescs that
 	    // are in nonVegEquals of 'tmpDesc' are correctly put back.
 	    if(tmpDesc->nonVegEquals().entries() != 0 )
 	      redo = TRUE;

 	  }
 	  else // it's from the inner table or it's an OUTER_JOIN
 	  {
 	    // ----------------------------------------------------------
 	    // In BETA when the line below wasn't commented out,
 	    // we got a memory violation
 	    //
 	    // NB: if we don't remove it, optimization will use
 	    // more memory overall, but there isn't a memory
 	    // leak per se.
 	    //
 	    // Basically, it's safest to comment out the line
 	    // below (this fixed Genesis case 10-980121-2284).
 	    // ----------------------------------------------------------

 	    //delete tmpDesc;
 	  }
 	}

 	if ( moveI == 0 )
 	  moveI++;
       }
       else
 	moveI++;

       i += moveI;

       if(redo)
       {
 	i = 0;
 	redo = FALSE;
       }
     }
   }

   // Update MC Skew values for joins
   if(expr)
   {
     ValueIdList joinColsGroup;
     const MCSkewedValueList * joinedMCSkewedValueList = ((Join *)expr)->getMCSkewedValueListForJoinPreds(joinColsGroup);
     if(joinedMCSkewedValueList)
     {
       if(!mcSkewedValueLists_)
         mcSkewedValueLists_ = new (HISTHEAP) MultiColumnSkewedValueLists();
       ValueIdList * key  = new (HISTHEAP) ValueIdList(joinColsGroup);
       MCSkewedValueList * value = new (HISTHEAP) MCSkewedValueList (*joinedMCSkewedValueList, HISTHEAP);
       mcSkewedValueLists_->insert( key, value );
     }
     else if(mcSkewedValueLists_)
     {
       mcSkewedValueLists_->clear(TRUE);
       mcSkewedValueLists_ = NULL;
     }
   }

   // The estimated cardinality should never go below zero
   if (newRowCount.getValue() < 0)
   {
     CCMPASSERT( newRowCount.isGreaterOrEqualThanZero() );
     newRowCount.minCsZero();
   }

   // This is the rowcount without using hints. Save it
   setScanRowCountWithoutHint(newRowCount);

   // Now after applying all the predicates, make any adjustments based on the
   // hints provided by the user. see if the expression is of the
   // index, and that if we can partially apply cardinality or selectivity hint

   if (cardHint || selHint)
     newRowCount = adjustRowcountWithHint(cardHint, selHint,
       setOfPredicates,
       newRowCount,
       initialRowCount);

   return newRowCount;

 } // ColStatDescList::estimateCardinality()

 // Adjust the rowcount based on the cardinality / selectivity / count(*) hint
 CostScalar
 ColStatDescList::adjustRowcountWithHint(const CardinalityHint * cardHint,
                                         const SelectivityHint * selHint,
                                         const ValueIdSet & setOfPredicates,
                                         CostScalar & newRowCount,
                                         const CostScalar & initialRowCount)
 {
   double selectivityHint = -1.0;
   ValueIdSet localPredsFromHint;
   double baseSelectivity = 1.0;
   CostScalar selWithoutHint = newRowCount / initialRowCount;
   if (cardHint!= NULL)
   {
     selectivityHint = cardHint->getScanSelectivity().getValue();
     localPredsFromHint = cardHint->localPreds();
     baseSelectivity = cardHint->getBaseScanSelectivityFactor();
   }
   else
   {
     selectivityHint = selHint->getScanSelectivityFactor();
     localPredsFromHint = selHint->localPreds();
     baseSelectivity = selHint->getBaseScanSelectivityFactor();
   }

   if (localPredsFromHint.entries() > 0)
   {
     ValueIdSet copySetOfPreds(setOfPredicates);

 	// Intersect the localPreds with localPredsFromHint to see
 	// if all predicate for which the Hint was calculated are covered
 	// More localPreds can come because of join predicates that showup
 	// as selection predicates. Less number of local predicates
 	// can be due to indexes. For lesser number of predicates, adjust
 	// selectivity from Hint accordingly

 	// get common predicates
 	copySetOfPreds.intersect(localPredsFromHint);

     if (copySetOfPreds.entries() < localPredsFromHint.entries())
     {
       // The control will come here only for indexes, and by that time
       // the selectivityHint would have been set, either from selectivityHint
       // directly or from CardinalityHint
       selectivityHint = pow(selWithoutHint.getValue(), baseSelectivity);
     }

     if (cardHint && (selectivityHint < 0) )
     {
       // this is the case when estimateCardinality has come from synthLogProp
       // and is still trying to compute the selectivityHint from cardinalityHint
       newRowCount = cardHint->getScanCardinality();
     }
     else
     {
       if (selectivityHint > 0.0)
       {
 	  // compute newRowCount from SelectivityHint
 	  newRowCount = initialRowCount * selectivityHint;
       }
       else
 	newRowCount = csOne;
     }

     if((newRowCount.getValue() - floor(newRowCount.getValue())) > 0.00001)
 	newRowCount = ceil(newRowCount.getValue());

     newRowCount = MIN_ONE_CS(newRowCount);

     // Now, normalize all histograms to the same resultant rowcount.
     for( CollIndex i = 0; i < entries(); i++ )
     {
       const CostScalar & oldCount =
 	  (*this)[i]->getColStats()->getRowcount();
       if( oldCount != newRowCount ) // && oldCount != 0
 	  (*this)[i]->synchronizeStats( oldCount, newRowCount );
     }
   }

   return newRowCount;
 }

 // -------------------------------------------------------------------
 // ColStatDescList::getCardOfBusiestStream
 // method returns the cardinality of the busiest stream for the given
 // partitioning key
 // -------------------------------------------------------------------

 CostScalar
 ColStatDescList::getCardOfBusiestStream(const PartitioningFunction* partFunc,
 								   Lng32 numOfParts,
 								   GroupAttributes * groupAttr,
 								   Lng32 countOfCPUs)
 {
   // get the partitioning key and number of partitions

   ValueIdSet partKey = partFunc->getPartitioningKey();

   // get the total rows in the histogram
   CostScalar rowCount = (*this)[0]->getColStats()->getRowcount();

   // if number of partitions is 1, return rowcount

   if ( ( numOfParts == 1) ||
 	   ( partFunc->isASinglePartitionPartitioningFunction() ) )
   {
 	return (rowCount).minCsOne();
   }

   // The cardinality is based on the number of CPUs or the number of
   // partitions (whichever is fewer) for a few situations:
   //   1) if partitioning key is empty
   //   2) the round robin partitioning scheme is used.
   //   3) the skew buster partitioning scheme is used.
   if ( (partKey.isEmpty()) ||
        (partFunc->isASkewedDataPartitioningFunction()) ||
        (partFunc->isARoundRobinPartitioningFunction()) )
   {
 	 Lng32 availableCpus = MINOF( numOfParts , countOfCPUs );
 	 return (rowCount / availableCpus).minCsOne();
   }

   // In the following loop, get the min UEC from amongst the partitioning
   // key to compute the number of streams. In the same loop also compute
   // the accumulated frequency of the partitioning key to compute cardinality
   // per stream for hash partitions

   // min UEC for partitioning key
   CostScalar uecForPartKey = csOne;

   // accumulated freq of the partitioning key
   CostScalar  accFreq = csOne;

   for (ValueId partKeyElement = partKey.init();
 	      partKey.next(partKeyElement);
 	      partKey.advance(partKeyElement) )
   {

 	// extract all base columns from the partitioning key column before
 	// looking for statistics

 	ValueIdSet baseColSet;

 	// this is because findAllReferencedBaseCols, is defined on ValueIdSet
 	ValueIdSet partKeySet(partKeyElement);

 	partKeySet.findAllReferencedBaseCols(baseColSet);

 	if (groupAttr)
 	{
 	  GroupAnalysis * grpAnalysis = groupAttr->getGroupAnalysis();

 	  if (grpAnalysis)
 	  {
 		CANodeIdSet treeSet = grpAnalysis->getAllSubtreeTables();

 		ValueIdSet myColumns;

 		if (NOT treeSet.isEmpty() )
 		  myColumns = treeSet.getUsedCols();

 		// from all the base columns for this partitioning key, get the
 		// one that belong to me. In case of joins, base column set would
 		// also contain the column I am joining to

 		if (NOT myColumns.isEmpty())
 		  baseColSet.intersectSet(myColumns);
 	  }
 	}

 	// get minimum UEC from the given column set
 	CostScalar colUec = csMinusOne;
 	CostScalar freq = csOne;

 	if (CmpCommon::getDefault(COMP_BOOL_47) == DF_OFF)
 	{
 	  colUec = getMinUec(baseColSet);
 	  // get the max frequency of any column from the given column set
 	  freq = getMaxOfMaxFreqOfCol(baseColSet);
 	}
 	else
 	{
 	  ItemExpr * partKeyExpr = partKeyElement.getItemExpr();

 	  // if partitioning key is a Case expression, then use all leaf values
 	  // including the constants to compute max frequencies. For all
 	  // other expressions use just the base column set
 	  if (partKeyExpr->getOperatorType() == ITM_CASE)
 	  {
 	    ValueIdSet partKeyLeafValueSet;
 	    partKeyExpr->getLeafValueIdsForCaseExpr(partKeyLeafValueSet);
 	    colUec = getMaxUecForCaseExpr(partKeyLeafValueSet);
 	    freq = getMaxFreqForCaseExpr(partKeyLeafValueSet);
 	  }
 	  else
 	  {
              colUec = getMaxUec(baseColSet);
            freq = getMinOfMaxFreqOfCol(baseColSet);
 	  }
 	}

 	if (colUec == csMinusOne)
 	  uecForPartKey = rowCount;
 	else
 	  uecForPartKey *= colUec;

 	accFreq *= (freq / rowCount);
   } // for all partitioning key columns

   // uec cannot be greater than the row count
   uecForPartKey = MINOF(uecForPartKey, rowCount);

   // compute the number of streams
   CostScalar noOfStreams = MINOF((CostScalar)numOfParts, uecForPartKey);

   // If partitioning key column is a Random number, and activeStreams_ is less
   // than the current value of noOfStreams, then noOfStreams = activeStreams_;
   CostScalar activeStreams = partFunc->getActiveStreams();
   long randomFix = ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_26);
   if ( (partKey.entries() == 1) AND (randomFix != 0) AND (activeStreams != 0) )
   {
     // Get first key column.
     ValueId myPartKeyCol;
     partKey.getFirst(myPartKeyCol);
     // is it a random number?
     if (myPartKeyCol.getItemExpr()->getOperatorType() == ITM_RANDOMNUM)
     {
       noOfStreams = MINOF(activeStreams, noOfStreams);
     }
   }

   // based on the partitioning type, compute the cardinality of the stream
   // For all but hash type partitions, cardinality = number of rows / # of streams
   // such that # of streams = MINOF(# of partitions, UEC of partitioning key)

   if (partFunc->isAHashPartitioningFunction() ||
 	  partFunc->isATableHashPartitioningFunction() )
   {
     // CostScalar cardOfFreqValue = (rowCount * accFreq).minCsOne();
     CostScalar cardOfFreqValue = (rowCount * accFreq);
     CostScalar maxCardPerStream = (((rowCount - cardOfFreqValue)/noOfStreams) + cardOfFreqValue).round();

     // cardinality per stream cannot be greater than the total row count
     maxCardPerStream = MINOF(maxCardPerStream, rowCount);

     // some plans are over-penalized by incorporate_skew_in_costing.
     // provide a cqd HIST_SKEW_COST_ADJUSTMENT to soften effect
     // of incorporate_skew_in_costing.
     // take weighted average of uniform distribution and skewed data
     // based on the CQD.
     // 0 -> get RC as if uniformly distributed
     // 1-> get RC as if skewed
     // anything in between is the linear average
     CostScalar uniformDistRowCountPerStream = rowCount / noOfStreams;
     CostScalar histSkewAdjustment =
       (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_SKEW_COST_ADJUSTMENT);
     maxCardPerStream =
       (maxCardPerStream * histSkewAdjustment) +
       (uniformDistRowCountPerStream * (csOne - histSkewAdjustment));

     return maxCardPerStream.minCsOne();
   }
   else
   {
     // for all other partitions
     // return rows per stream
     return (rowCount / noOfStreams).minCsOne();
   } // for non hash partitioning functions
 } // ColStatDescList::getCardOfBusiestStream

 // ColStatDescList::identifyMergeCandidates
 //
 // A utility routine to identify which of the entries in the given
 // ColStatDescList (containing statistics associated with columns that
 // are accessible in the current operator) are related to the given
 // predicate.
 // -----------------------------------------------------------------------

 NABoolean
 ColStatDescList::identifyMergeCandidates (ItemExpr *VEGpred,
                                           CollIndex & rootStatIndex,
                                           CollIndexList & statsToMerge) const
 {
   NABoolean foundRoot = FALSE;

   // First, get all members of the VEG group
   const VEG * predVEG = ((VEGPredicate *)VEGpred)->getVEG();
   const ValueIdSet & VEGGroup = predVEG->getAllValues();

   for ( CollIndex i = 0; i < entries(); i++ )
   {
     // 1. See if this histogram references the VEGReference id.
     // 2. Keep track of all histograms that need to be merged.
     // 3. Among those to merge, remember the first as the 'root' histogram.
     //
     // The contents of (*this)[i]->getVEGColumn() may be a VEGReference
     // or an instantiate_null expression.
     const ItemExpr * columnsExpr = (*this)[i]->getVEGColumn().getItemExpr();
     OperatorTypeEnum colType = columnsExpr->getOperatorType() ;


     // QSTUFF
     // In addition to checking for the following colTypes, we shall also
     // see if the histogram for this column is a for a virtual column
     // This could be for cases where a virtual column was created for a
     // constant expression. This is something that is expected. If there are
     // other columns in this colStatDescList, then we might be able to
     // find the root. If not, then the method will return FALSE, which is
     // acceptable.
     if ( NOT (	  colType == ITM_VEG_REFERENCE
 	       OR colType == ITM_INSTANTIATE_NULL
 	       OR colType == ITM_VALUEIDUNION
 	       OR colType == ITM_UNPACKCOL
 	       OR colType == ITM_ROWSETARRAY_SCAN
 	       OR (*this)[i]->getColStats()->isVirtualColForHist()
 	     )
 	)
     {
       HISTWARNING("Unexpected column type");
     }
     // QSTUFF

     // There are 4 conditions where the current histogram is of interest:
     //  (a) its identifying column's VEG matches the current predicate
     //      or is contained by a VEG_REFERENCE that is contained by
     //      the current predicate.
     //  (b) its identifying column's VEG is an instantiate_null, the
     //      current predicate also contains an instantiate_null, and
     //      the valueIds of those instantiate_nulls are identical;
     //  (c) its identifying column's VEG is a VALUEIDUNION
     //  (d) its identifying column's VEG is a ITM_ROWSETARRAY_SCAN implying
     //	    it is a histogram created for a rowset

     NABoolean foundCandidate = FALSE;

     // case (a) : exact match
     if ( colType == ITM_VEG_REFERENCE )
     {
       if ( VEGpred->containsTheGivenValue( columnsExpr->getValueId() ) )
       {
 	foundCandidate = TRUE;
       }
     }
     else if ( colType == ITM_INSTANTIATE_NULL )
     {
       for ( ValueId id = VEGGroup.init();
             VEGGroup.next( id );
 	    VEGGroup.advance( id ) )
       {
 	// case (b) : instantiate_null
 	if (	 id.getItemExpr()->getOperatorType() == ITM_INSTANTIATE_NULL
 	     AND colType == ITM_INSTANTIATE_NULL )
 	{
 	  const ValueId & nulledVid = columnsExpr->getValueId();
 	  if ( nulledVid == id )
 	  {
 	    foundCandidate = TRUE;
 	    break;
 	  }
 	}
 	else
 	{
 	  // ITM_INDEXCOLUMN (on insert, select, drop table)
 	  // ITM_INDEXCOLUMN, ITM_DYN_PARAM (on create table, drop table)
 	  // ITM_BASECOLUMN (during init_sql)
 	  // ITM_CONSTANT (on select, drop table)
 	  continue;
 	}
       }
     }
     else if ( colType == ITM_VALUEIDUNION || colType == ITM_UNPACKCOL ||
 	      colType == ITM_ROWSETARRAY_SCAN )
     {
       if ( VEGpred->containsTheGivenValue( columnsExpr->getValueId() ) )
       {
 	foundCandidate = TRUE;
       }
     }

     if ( foundCandidate )
     {
       statsToMerge.insert( i );

       if ( NOT foundRoot )
       {
 	foundRoot = TRUE;
 	rootStatIndex = i;
       }
     }
   }
   return foundRoot;
 } // ColStatDescList::identifyMergeCandidates  (#1)

 // -----------------------------------------------------------------------
 // ColStatDescList::identifyMergeCandidates
 //
 // A variant routine to identify which of the entries in the given
 // ColStatDescList match the given operand, which appears in a non-VEG
 // equality predicate.
 //
 // NOTE : the return type from this function (for some reason) is always FALSE.
 // -----------------------------------------------------------------------
 NABoolean
 ColStatDescList::identifyMergeCandidates (ItemExpr * operand,
                                           CollIndexList & statsToMerge) const
 {
   OperatorTypeEnum exprType = operand->getOperatorType();
   ValueId nulledVId;
   ValueId id;

   // walk the ColStatDescList
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     NABoolean foundCandidate = FALSE;
     CollIndex nonVEGCount = ((*this)[i]->getNonVegEquals()).entries();

     ItemExpr * columnsExpr = (*this)[i]->getVEGColumn().getItemExpr();
     OperatorTypeEnum colType = columnsExpr->getOperatorType();

     do
     {
       // Case 1: a VegReference that might contain an instantiate null
       if ( colType  == ITM_INSTANTIATE_NULL &&
 	   exprType == ITM_VEG_REFERENCE )
       {
 	nulledVId = columnsExpr->getValueId();
 	const VEG * exprVEG = ((VEGReference *)operand)->getVEG();
 	const ValueIdSet & VEGGroup  = exprVEG->getAllValues();

 	for ( id = VEGGroup.init();
 	      VEGGroup.next( id );
 	      VEGGroup.advance( id ) )
 	{
 	  if ( id.getItemExpr()->getOperatorType() == ITM_INSTANTIATE_NULL &&
 	       nulledVId == id.getItemExpr()->getValueId() )
 	  {
 	    foundCandidate = TRUE;
 	    break; // jump to outer do-while
 	  }
 	}
       }
       // Case 2: inverse of case 1
       else if ( colType  == ITM_VEG_REFERENCE &&
 	        exprType == ITM_INSTANTIATE_NULL )
       {
 	nulledVId = operand->getValueId();
 	const VEG * statVEG = ((VEGReference *)columnsExpr)->getVEG();
 	const ValueIdSet & VEGGroup = statVEG->getAllValues();

 	for ( id = VEGGroup.init();
 	      VEGGroup.next( id );
 	      VEGGroup.advance( id ) )
 	{
 	  if ( id.getItemExpr()->getOperatorType() == ITM_INSTANTIATE_NULL &&
 	       nulledVId == id.getItemExpr()->getValueId() )
 	  {
 	    foundCandidate = TRUE;
 	    break; // jump to outer do-while
 	  }
 	}
       }
       // Case 3: two VEG References
       else if ( colType  == ITM_VEG_REFERENCE &&
 		exprType == ITM_VEG_REFERENCE )
       {
 	const VEG * statVEG = ((VEGReference *)columnsExpr)->getVEG();
 	const VEG * exprVEG = ((VEGReference *)operand)->getVEG();

 	if ( statVEG == exprVEG )
 	{
 	  foundCandidate = TRUE;
 	}
 	else // not a direct match
 	{
 	  // First, get all members of the various VEG groups
 	  const ValueIdSet & VEGGroup  = exprVEG->getAllValues();
 	  const ValueIdSet & VEGGroup2 = statVEG->getAllValues();

 	  // look for column's valueId in a VEGREF in the operand's
 	  // valueId set.
 	  for ( id = VEGGroup.init();
 		VEGGroup.next( id );
 		VEGGroup.advance( id ) )
 	  {
 	    if ( id.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE )
 	    {
 	      const VEG* nestedVEG =
 		((VEGReference *)(id.getItemExpr()))->getVEG();

 	      if ( statVEG == nestedVEG )
 	      {
 		foundCandidate = TRUE;
 		break; // jump to outer do-while
 	      }
 	    }
 	  }

 	  if ( !foundCandidate )
 	  {
 	    // look for the operand's valueId in a VEGREF in the
 	    // column's valueId set.
 	    for ( id = VEGGroup2.init();
 		  VEGGroup2.next( id );
 		  VEGGroup2.advance( id ) )
 	    {
 	      if ( id.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE )
 	      {
 		const VEG* nestedVEG =
 		  ((VEGReference *)(id.getItemExpr()))->getVEG();

 		if ( exprVEG == nestedVEG )
 		{
 		  foundCandidate = TRUE;
 		  break; // jump to outer do-while
 		}
 	      }
 	    }
 	  }
 	}
       }
       else if (
 	// Case 4: two Instantiate NULLs.
 	//     (could look inside their operands???
 	   ( colType  == ITM_INSTANTIATE_NULL &&
 	     exprType == ITM_INSTANTIATE_NULL )
 	// Case 5 : ValueIdUnions
 	OR ( colType  == ITM_VALUEIDUNION &&
 	     exprType == ITM_VALUEIDUNION )
 	// Case 6 : special case for packed col
 	OR ( colType  == ITM_UNPACKCOL &&
 	     exprType == ITM_UNPACKCOL )
 	// Case 7 : VEG_PREDs
 	OR ( colType  == ITM_VEG_PREDICATE &&
 	     exprType == ITM_VEG_PREDICATE )
         OR ( ( CmpCommon::getDefault(COMP_BOOL_48) == DF_ON) &&
            ( colType == ITM_VEG_REFERENCE &&
              exprType == ITM_NATYPE) ) )
       {
 	const ValueId & statVid = columnsExpr->getValueId();
 	const ValueId & exprVid = operand->getValueId();
 	if ( statVid == exprVid )
 	{
 	  foundCandidate = TRUE;
 	}
       }
       else // VEG_REF + VEG_PRED, others?
       {
 	// $$$ this is safe, but probably not complete!
 	if ( columnsExpr->getValueId() == operand->getValueId() )
 	{
 	  foundCandidate = TRUE;
 	}
       }

       if ( foundCandidate )
       {
 	statsToMerge.insert( i );
       }

       // look for a transitive equality relationship between this column
       // and any other columns, where that equality relationship is not
       // represented in a VEGPred.
       if ( nonVEGCount > 0 )
       {
 	ColStatDescSharedPtr tmpColStatDesc =
 	  ((*this)[i]->getNonVegEquals())[--nonVEGCount];

 	columnsExpr = tmpColStatDesc->getVEGColumn().getItemExpr();
 	colType = columnsExpr->getOperatorType();
       }
       else
 	columnsExpr = NULL;
      }
      while ( columnsExpr != NULL AND foundCandidate == FALSE );

   }  // for

   return FALSE;
 } // ColStatDescList::identifyMergeCandidates  (#2)

 // -----------------------------------------------------------------------
 // ColStatDescList::applyVEGPred
 //
 // A CSDL::estimateCardinality() subroutine
 //
 // Given a VEG Predicate, merge all histograms that belong to the same
 // equivalence group.
 //
 //  EX 1:  VEG = (T1.A, T1.B, T1.C)
 //             synthesize the effect of T1.A = T1.B = T1.C,
 //                  resulting in one consolidated histogram for the VEG
 //
 //  EX 2:  VEG = (T1.A, T1.B, 10)
 //             synthesize the effect of T1.A = T1.B = 10,
 //                  resulting in a degenerate histogram with a single
 //                  interval representing the value 10.
 //
 // There is an assumption that all outer histograms in the ColStatDescList
 // show the same number of rows and that all inner histograms in the list
 // show the same number of rows.  But it is not guaranteed that the number
 // of rows in the outer 'table' equals the number of rows in the inner
 // 'table.'
 //
 // ColStatDescList::mergeStats has a return value to tell the caller
 // whether or not Default Selectivity should be applied (e.g., because
 // mergeStats couldn't process the predicate).
 // The test for setting the return value is non-obvious:
 //  - If there is a predicate and it can be completely applied, then don't
 //    tell the caller to apply default;
 //  - If there is a predicate and it can't be completely applied, then do
 //    apply default.
 //  - If there is no predicate that can be evaluated at run time by this
 //    node, then don't apply default.
 //
 // This is the new version of this function which facilitates applying all
 // predicates at once.
 // -----------------------------------------------------------------------
 NABoolean
 ColStatDescList::applyVEGPred (ItemExpr *VEGpred,
                                CostScalar & rowcount,
                                CollIndex & numOuterColStats,
                                MergeType mergeMethod,
 			       OperatorTypeEnum opType,
                                CostScalar *maxSelectivity)
 {
   VEG * predVEG = NULL;

   if ( VEGpred->getOperatorType() == ITM_VEG_PREDICATE )
     predVEG = ((VEGPredicate *)VEGpred)->getVEG();
   else
     return FALSE ; // we did not apply the predicate

   NABoolean selHintSpecified = VEGpred->isSelectivitySetUsingHint();

   const ValueId & predValueId = VEGpred->getValueId();
   // Get all members of the VEG group
   const ValueIdSet & VEGGroup = predVEG->getAllValues();

   // This is used by selectivity adjustment code.
   ValueIdSet mergeStatePriorToJoin;

   // retain initial guess-timated rowcount and uec, in case we can't
   // improve on this...
   CostScalar newRowcount  = rowcount; // guesstimate of outer rows
   CostScalar newUec	  = csOne;
   CostScalar saveRowcount = rowcount;
   CostScalar saveUec	  = csOne;
   CostScalar oldRowcountForSelAdj = rowcount;

   CollIndexList statsToMerge(CmpCommon::statementHeap());

   CollIndex i;
   CollIndex rootStatIndex = NULL_COLL_INDEX ;
   ColStatDescSharedPtr rootStatDesc;
   ColStatsSharedPtr rootColStats;
   HistogramSharedPtr hist;
   NABoolean appliedPredicateFlag = FALSE; // return flag: Success Indicator


   // See if the VEG group contains a constant.
   // NOTE:  A VEG group should not contain more than 1 constant; but if
   //        that should happen, referencesAConstValue will return the first
   //        constant found.

   ItemExpr *constant = NULL;
   NABoolean containsConstant  = VEGGroup.referencesAConstValue( &constant );
   ItemExpr* constExprPtr = NULL;

   NABoolean containsConstExpr = VEGGroup.referencesAConstExpr(&constExprPtr);


   // We want the selectivity param to contribute in the same way as its
   // substituted literal, in the context of apply VEG predicate. Note
   // simply treating the param as a host variable does not work well
   // because the selectivity that it contributes is computed from the
   // interval of the base histogram. Here the ::applyXXXPred() methods
   // can be called on behalf of a set of computed histograms. In that case,
   // contributing "original" selectivity info can lead to incorrect
   // result as demonstrated by Query 02 failure in opt/optdml03 test. Here
   // the cardinality estimate for REGION table in the left part of the join
   // tree (the right part is for the subquery) is way too small.
   NABoolean containsSelectivityParam = (containsConstExpr AND
             constExprPtr AND
             constExprPtr->getOperatorType() == ITM_CACHE_PARAM AND
             constExprPtr->castToSelParameter()
                                        );

   // Turn on "containsConstant" when a selectivity param is found in the
   // predicate, so that the param can be used in place of the literal.
   if ( containsSelectivityParam )
      containsConstant = TRUE;

   NABoolean containsHostvar = !containsConstant && containsConstExpr;


   // Explanation of the above: there is currently no "contains a hostvar
   // or dynamic parameter" function.  So, we use what's available.
   //
   // referencesAConstValue() returns TRUE if the VEGGroup contains a constant
   //
   // referencesAConstExpr() returns TRUE if the VEGGroup contains a constant
   // or a hostvar/dynamic parameter
   //
   // Thus, if referencesAConstExpr returns TRUE, but referencesAConstValue
   // returns FALSE, then there's a hostvar, not a constant.


   // In the case of a join, the left child's histograms are at the
   // beginning of the input ColStatDescList, and that fact is used
   // when mergeColStats is dealing with a [anti-]semi-Join.
   //
   // Also note: in the case of a [anti-]semi-join, this logic depends upon
   //      there being no outer-table-only VEGPreds remaining in the
   //      input predicate (they should have been dealt with in the
   //      scan of the outer table).
   //
   // locate the entries in this ColStatDescList that are associated with
   // the current VEG predicate.
   NABoolean foundRoot =
     identifyMergeCandidates( VEGpred, rootStatIndex, statsToMerge );

   CollIndex candidateHistograms = statsToMerge.entries();

   // There is no merge to perform if we don't find a root histogram.
   // BUT, failure to find a root doesn't mean that there is nothing to
   // do.  There is only nothing to do if we didn't find any histogram.
   if ( candidateHistograms == 0 )
     return FALSE; // we did not apply the predicate


   // If there's a hostvar, we want to apply some selectivity!  So return
   // FALSE, and later on, in applyDefaultPred, apply the selectivity we
   // want.
   if ( foundRoot && candidateHistograms == 1 && containsHostvar )
     return FALSE; // we didn't apply the predicate


   if ( foundRoot)
   {
     // Get modifiable copy of ColStats
     rootStatDesc = (*this)[rootStatIndex];
     rootColStats = rootStatDesc->getColStatsToModify();

     if(VEGpred->isSelectivitySetUsingHint())
       mergeStatePriorToJoin = rootStatDesc->getMergeState();

    if (maxSelectivity == NULL) {
     // Get a modifiable copy of Histogram
     hist = rootColStats->getHistogramToModify();

     if ( containsConstant || statsToMerge.entries() > 1 )
     {
       // Initialize resultant rowcount and uec to the actual
       // (non-estimated) initial values.
       newRowcount = rootColStats->getRowcount();
       newUec      = rootColStats->getTotalUec();

       // save the initial aggregate information for later processing.
       saveRowcount = newRowcount;
       saveUec      = newUec;

       // Indicate that we have applied this predicate.  Of course we
       // haven't already applied this predicate, but we expect to,
       // soon.
       //
       // Note that unlike for other predicates, the appliedPreds set
       // is not used to prevent duplicate application of a VEG
       // predicate.
       //
       // Different VEG predicates look identical!
       rootStatDesc->addToAppliedPreds( predValueId );
     }
    } // maxSelectivity == NULL
   } // foundRoot

   // Temp fix for solution 10-100607-0915, off by default.
   NABoolean tupleVirtColFix = FALSE;
   if ( foundRoot && ((rootColStats->getStatColumns()).entries() == 0) &&
        (CmpCommon::getDefault(COMP_BOOL_165) == DF_ON) )
     tupleVirtColFix = TRUE;

   // check first for " = constant " case
   if ( foundRoot && containsConstant && (tupleVirtColFix == FALSE) )
   {
     // If reference a constant or a selectivity param, then first reduce the histogram
     if ( containsSelectivityParam == TRUE )
        constant = ((SelParameter*)constExprPtr)->getConstVal();

     EncodedValue normValue( constant, FALSE );
     NABoolean neg = FALSE;
     ConstValue* constExpr = constant->castToConstValue(neg);

     if (maxSelectivity == NULL)
        rootColStats->setToSingleValue( normValue, constExpr, &rowcount);
     else
        rootColStats->adjustMaxSelectivity(normValue, constExpr, &rowcount, maxSelectivity);

     appliedPredicateFlag = TRUE;

    if (maxSelectivity == NULL)
    {
      if (rootColStats->isOrigFakeHist())
      {
        // set the final rowcount as the square root of the baserowcount
        CostScalar newRC = csOne;
        if (!rootColStats->isUnique())
          newRC = ceil(sqrt(rowcount.getValue()));
        rootStatDesc->synchronizeStats(rootColStats->getRowcount(), newRC);
      }

     // update the uec and rowcount
     newRowcount = rootColStats->getRowcount();
     newUec      = rootColStats->getTotalUec();

     // $$$ WE NO LONGER WANT TO NORMALIZE HISTOGRAMS AFTER EVERY
     // $$$ PREDICATE!
     //
     // Instead, we will normalize all of the histograms' rowcounts after
     // we have applied all of the predicates.

     // update the 'saved' information
     saveRowcount = newRowcount;
     saveUec      = newUec;
    } // maxSelectivity == NULL
   }

   // Next, check for non-root histograms to merge.
   if ( foundRoot && statsToMerge.entries() > 1 ) // make sure there is work
   {
     // Typically, this means: For each matching histogram interval,
     //     # rows = (#rows A) * (#rows B) * (1/MAX(uecA, uecB))
     //     # uec  = MIN (uecA, uec B)
     //   where A represents one histogram and B the other histogram
     //
     // However, there are non-typical cases to worry about.
     // If the predicate being done is t1.a=t1.a, then the result of
     //   that merge is the histogram that has the smaller rowcount.
     // If the predicate is one where the result of the merge t1.a=
     //   t1.b is now again being merged with t1.a, then the result
     //   is the result of the earlier merge of t1.a=t1.b
     //
     // In response to a possible protest that the above can't happen,
     //   you might be correct.  But what is it that would prevent it??
     // NOTE: this code assumes that all predicates that can be pushed
     //       down to 'identical' columns are pushed down to all copies
     //       of those columns.
     for ( i = 0; i < statsToMerge.entries(); i++ )
     {
       if ( statsToMerge[i] != rootStatIndex )
       {
 	appliedPredicateFlag = TRUE;

 	// If the statistics to be merged are from opposite sides of
 	// the numOuterColStats boundary, then we are doing a
 	//      left_table_column = right_table_column
 	// merge that should be done as the caller requested.
 	// Otherwise, use HIST_NO_STATS_UEC to compute selectivity
 	MergeType localMergeMethod = mergeMethod;
 	NABoolean joinOnOneTable = FALSE;

 	if ( NOT ( rootStatIndex < numOuterColStats &&
 		   statsToMerge[i] >= numOuterColStats ) )
 	{
 	  if(!containsConstant && maxSelectivity == NULL)
 	  {
 	    // even though the mergeMethod may never be used, but we are
 	    // initializing this for cases when COMP_BOOL_74 is OFF
 	    localMergeMethod = INNER_JOIN_MERGE;
 	    joinOnOneTable = rootStatDesc->mergeColStatDescOfSameTable((*this)[statsToMerge[i]], opType);
 	  }
 	  else if (containsConstant)
 	  {
 	    // This is the case of query with selection predicates (A = 1 and B = 1)
             // part of fix for genesis cases 10-080530-0291, 10-080530-0305
             // this block of code is needed to correctly compute
             //   maxcardinality(a=1 and b=1)
             // Otherwise, it is incorrectly computed as
             //   maxcardinality(a=b and b=1)
 	    joinOnOneTable = TRUE;
 	  }
 	}

 	if (containsConstant)
 	{
           // Do not apply the predicate if it has already been applied
           ValueIdSet appliedPreds = (*this)[statsToMerge[i]]->getAppliedPreds();
           if (!appliedPreds.contains(predValueId))
           {
             EncodedValue normValue( constant, FALSE );
             NABoolean neg = FALSE;
             ConstValue* constExpr = constant->castToConstValue(neg);

             if (maxSelectivity == NULL)
             {
                if(!selHintSpecified)
                  (*this)[statsToMerge[i]]->getColStatsToModify()->setToSingleValue
                                              (normValue, constExpr, &rowcount);
             }
             else
               (*this)[statsToMerge[i]]->getColStatsToModify()->
               adjustMaxSelectivity (normValue, constExpr, &rowcount, maxSelectivity );
 	   }
         }

        if (maxSelectivity == NULL)
        {
         // for outer joins, if the join VEG pred contains constant, then
         // that has already been applied, so do not do any merge with the right side now
         // Example for queries like T1 left jon T2 on T1.a = T2.a and T1.a = 1
         // the VEG pred (T2.a = 1 = VegRef(T1.a)) will appear both at the Scan (T2.a) and
         // left join node. The predicate has already been applied on rootStatDesc (join result)
         // by the time the control comes here, so skip the merge
         if (((mergeMethod == OUTER_JOIN_MERGE) && containsConstant)
             || joinOnOneTable)
         {
           // The merge has already been done, so skip mergeColStatDesc
         }
         else
         {
           // else merge the left and the right histograms
           rootStatDesc->mergeColStatDesc ((*this)[statsToMerge[i]],
             localMergeMethod,
             FALSE, // don't force merge
             opType
           );
         }
 	// get the aggregate results following the latest merge
 	newRowcount = rootColStats->getRowcount();
 	newUec      = rootColStats->getTotalUec();

 	// $$$ WE NO LONGER WANT TO NORMALIZE HISTOGRAMS AFTER EVERY
 	// $$$ PREDICATE!
 	//
 	// Instead, we will normalize all of the histograms' rowcounts after
 	// we have applied all of the predicates.

 	// update the 'saved' information
 	saveRowcount = newRowcount;
 	saveUec      = newUec;
        } // maxSelectivity == NULL
       }
     }
   }

  if (maxSelectivity == NULL) {
   // For histograms that belong to the same equivalence class:
   //   IF this is a not an Outer Join keep only the root
   //   IF this is an Outer Join, the original histograms need to be
   //      replaced by copies of the root; they will be null-augmented.
   if ( !containsConstant && statsToMerge.entries() > 1 )
   {
     // Walk the list backwards to avoid invalidating entries in the list
     // statsToMerge

     //NB: i is unsigned, so this loop as-is should never terminate
     for ( i = statsToMerge.entries() - 1; i >= 0; i-- )
     {
       if (statsToMerge[i] != rootStatIndex)
       {
 	if ( mergeMethod == OUTER_JOIN_MERGE &&
 	     statsToMerge[i] >= numOuterColStats )
 	{
 	  ColStatsSharedPtr updateColStats =
 	    (*this)[statsToMerge[i]]->getColStatsToModify();
 	  updateColStats->overwrite( *rootColStats );
 	}
 	else
 	{
 	  this->remove( (*this)[statsToMerge[i]] );

 	  // maintain the numOuterColStats value when removing
 	  // column statistics from the outer table's list.
 	  if ( statsToMerge[i] < numOuterColStats )
 	    numOuterColStats--;
 	}
       }
       // Since i is unsigned, it'll never become negative, so the loop
       // above (should) never terminate!  Note that for many months, the
       // loop above DID terminate just fine ...  dunno why, though
       if ( i == 0 )
 	break ;
     } // for
   } // if

   // return the [altered] rowcount
   rowcount = newRowcount;

   // If no merge could be performed (due to unavailable columns: NOTE
   // we cannot distinguish between run-time unavailability of columns
   // and optimizer unavailability of column statistics), instruct the
   // caller to not apply default selectivity.
   //
   // If the number of candidate histograms is greater than the number
   // actually merged (which should never happen, because we removed
   // multi-column histograms ... oh well), then tell the caller to apply
   // default selectivity ==> even if we *did* do some merging.
   //
   // ** special case: a VEG containing a constant: we'll do enough
   // ** reduction here that default selectivity shouldn't also be applied
   if ( candidateHistograms <= 1 )
     appliedPredicateFlag = TRUE;
   else if ( candidateHistograms > statsToMerge.entries() &&
             !containsConstant )
     appliedPredicateFlag = FALSE;
   } // maxSelectivity == NULL

   // If user specified selectivity for this predicate, we need to make
   // adjustment in reduction to reflect that.
   if ((containsConstant || statsToMerge.entries() > 1)
     && selHintSpecified)
   {
     ItemExpr * tempPred = NULL;
     ItemExpr * selPred = NULL;
     NABoolean neg = FALSE;
     ValueIdSet baseCols;

     // If VEG predicate already has selectivity specified, use it directly.
     if(VEGpred->getSelectivityFactor() != -1)
       selPred = VEGpred;

     ValueIdSet mergeState = rootStatDesc->getMergeState();
     ValueIdSet predSet = ((VEGPredicate *)VEGpred)->getPredsWithSelectivities();

     for ( ValueId predId = predSet.init();
           predSet.next( predId );
           predSet.advance( predId ) )
     {
       if(selPred)
         break;

       tempPred = predId.getItemExpr();

       if(containsConstant)
       {
         // This is for scenario where we have just applied a local predicate
 	    // (eg. 't1.a = 5') and have a hint on that predicate.
         if((mergeState == ValueIdSet(tempPred->child(0))) &&
           (tempPred->child(1)->castToConstValue(neg)))
           selPred = tempPred;
         else
           continue;
       }
       else
       {
         baseCols.clear();
         tempPred->findAll(ITM_BASECOLUMN, baseCols, TRUE, TRUE);
 	    // This is the scenario where we are dealing with the first join
         if(mergeState == baseCols)
           selPred = tempPred;
         else if(mergeState.contains(baseCols))
         {
 	      // If the query has multiple tables joined on the same column resulting in VEG.
           // Eg, t1.a = t2.b and t1.a = t3.c and t1.a = t4.d => VEGPred(t1.a, t2.b, t3.c,t4.d)
 	      // We need to get the correct predicate from the set of predicates in the VEG predicate.
 	      ValueIdSet newColumnMerged = mergeState;
           newColumnMerged.subtractSet(mergeStatePriorToJoin);
           if(baseCols.contains(newColumnMerged))
           {
             ValueIdSet secondColumnInTheMerge = baseCols;
             secondColumnInTheMerge.subtractSet(newColumnMerged);
             if(mergeStatePriorToJoin.contains(secondColumnInTheMerge))
               selPred = tempPred;
           }
           else
             continue;
         }
         else
           continue;
       }
     }

     if(selPred)
       rootStatDesc->applySelIfSpecifiedViaHint(selPred, oldRowcountForSelAdj);
   }
   return appliedPredicateFlag;

 } // applyVEGPred

 // -----------------------------------------------------------------------
 //  ColStatDescList::applyBiLogicPred
 //
 //  A CSDL::estimateCardinality() subroutine
 //
 // -----------------------------------------------------------------------
 CostScalar
 ColStatDescList::applyBiLogicPred(CostScalar & tempRowcount,
 				  ValueIdSet & BiLogicPreds,
 				  const ValueIdSet & outerReferences,
                                                        const Join * expr,
 				  const SelectivityHint * selHint,
 				  const CardinalityHint * cardHint,
 				  CollIndex & numOuterColStats,
 				  ValueIdSet & unresolvedPreds,
 				  MergeType mergeMethod,
 				  NAHashDictionary<ValueId, CostScalar> & biLogicPredReductions, // in/mod
 				  OperatorTypeEnum exprOpCode,
 				  CostScalar *maxSelectivity)
 {
   ValueId id;

   if (BiLogicPreds.entries() == 0) return tempRowcount;

   CostScalar newRowCount  = MIN_ONE_CS( tempRowcount );

   // maxSelectivity(p1 AND p2) == MIN(maxSelectivity(p1), maxSelectivity(p2))
   // maxSelectivity(p1 OR p2) == MIN(1, maxSelectivity(p1)+maxSelectivity(p2))
   CostScalar origRowCount = tempRowcount;

   for( id = BiLogicPreds.init();
        BiLogicPreds.next( id );
        BiLogicPreds.advance( id ) )
   {
     ItemExpr * pred = id.getItemExpr();
     ValueIdSet leftChildSet ( pred->child(0)->getValueId() );
     ValueIdSet rightChildSet( pred->child(1)->getValueId() );
     OperatorTypeEnum op = pred->getOperatorType();

     NABoolean isLike = ((BiLogic*)pred)->isLike();
     if(isLike && maxSelectivity)
     {
       *maxSelectivity = 1.0;
       break;
     }

     NABoolean doEstCard = (maxSelectivity==NULL);

     origRowCount = tempRowcount;

     if ( op == ITM_AND )
     {
       CostScalar maxSel1 = 1.0, maxSel2 = 1.0;

       // Apply both left and right predicates to the given ColStatDescList
       newRowCount = estimateCardinality(tempRowcount,
 					leftChildSet,
 					outerReferences,
                                         expr,
 					selHint,
 					cardHint,
 					numOuterColStats,
 					unresolvedPreds,
 					mergeMethod,
                                         ITM_AND,
                                         doEstCard ? NULL : &maxSel1
 					);
       tempRowcount = newRowCount;

       newRowCount = estimateCardinality(tempRowcount,
 					rightChildSet,
 					outerReferences,
                                         expr,
 					selHint,
 					cardHint,
 					numOuterColStats,
 					unresolvedPreds,
 					mergeMethod,
                                         ITM_AND,
                                         doEstCard ? NULL : &maxSel2
 					);
       tempRowcount = newRowCount;
       // maxSelectivity(p1 AND p2)==MIN(maxSelectivity(p1),maxSelectivity(p2))
       if (!doEstCard)
         {
           *maxSelectivity = MINOF(*maxSelectivity, MINOF(maxSel1, maxSel2));
         }
     }
     else if ( op == ITM_OR )
     {
       // special case: we'd like to detect OR-trees which result from
       // large IN-lists
       //
       // we will try to detect such an IN-list below; if this predicate is
       // such an IN-list, then we'll handle the histogram-transformation
       // a little simpler than what we usually do ...
       NABoolean large_in_list = FALSE;

       const ValueId & Lid = pred->child(0)->getValueId();
       const ItemExpr * Lpred = Lid.getItemExpr();
       const OperatorTypeEnum Lop = Lpred->getOperatorType();

       const ValueId & Rid = pred->child(1)->getValueId();
       const ItemExpr * Rpred = Rid.getItemExpr();
       const OperatorTypeEnum Rop = Rpred->getOperatorType();

       ValueId RidLeft;
       NABoolean inListCase = TRUE;
       if ( Rop == ITM_EQUAL) // don't look at child if not correct operator!
       {
 	RidLeft = Rpred->child(0)->getValueId();
       }
       else
       {
         inListCase = FALSE;
       }

       Int32 in_list_members = 1; // count the first right child (above)
       if ( (
             Lop == ITM_OR
 	AND Rop == ITM_EQUAL
 	AND
 	    // at this point we're reasonably sure that we're inside an OR-tree
 	    // that was transformed from a (large) IN-list; we now call a
 	    // function to check this carefully ...
 	    isPredTransformedInList ( Lid, RidLeft, in_list_members )
 	AND
 	    // $$$ NB: we ARBITRARILY set the number of IN-list members
 	    // $$$ for which we try to carefully perform *EXACT*
 	    // $$$ histogram manipulation
 	    in_list_members > HIST_MAX_IN_LIST_MEMBERS
 	AND
 	    // just in case the constant/default has a dumb value
 	    in_list_members > 5
 	    ) ||
             ((Lop == ITM_OR )
             AND
             (inListCase == FALSE)
             AND
             (((BiLogic*)pred)->getNumLeaves() > HIST_MAX_IN_LIST_MEMBERS))
 	)
       {
 	large_in_list = TRUE;
        if (maxSelectivity == NULL)
        {
 	// now do the necessary histogram-manipulation ...
 	//
 	// first we find the histogram which matches the IN-list;
 	// then we do some manipulation

 	if (RidLeft == NULL_VALUE_ID)
 	  return newRowCount;
 	ItemExpr * RidLeftExpr = RidLeft.getItemExpr();
 	OperatorTypeEnum RidLeftOp = RidLeftExpr->getOperatorType();

 	while ((RidLeftOp !=  ITM_INDEXCOLUMN) &&
 	       (RidLeftOp !=  ITM_INSTANTIATE_NULL) &&
 	       (RidLeftOp !=  ITM_BASECOLUMN) &&
 	       (RidLeftOp !=  ITM_VEG_REFERENCE) )
 	{

 	  // Do not assume that the left child of EQUAL will always be a column
 	  // it can be a SUBSTRING too, with the column CASTed.

 	  if (  (RidLeftExpr->child(0)) &&
 			(RidLeftExpr->child(0)->getOperatorType() != ITM_CONSTANT) )
 		RidLeft = RidLeftExpr->child(0)->getValueId();
 	  else
 	  {
 		// for TRIM like functions, the left child is a constant while
 		// the right child is a column.
 		if (RidLeftExpr->child(1) )
 		  RidLeft = RidLeftExpr->child(1)->getValueId();
 		else
 		{
 		  // There is no column to apply predicate, hence return newRowCount
 		  return newRowCount;
 		}
 	  }
 	  RidLeftExpr = RidLeft.getItemExpr();
 	  RidLeftOp = RidLeftExpr->getOperatorType();
 	}

 	if (RidLeft == NULL_VALUE_ID)
 	  return newRowCount;

 	CollIndex histIdx;
 	if ( NOT this->getColStatDescIndexForColumn( histIdx,RidLeft ) )
 	{
 	  //10/15/2004
 	  //
 	  //Initially we used to assert here, the following if was missing
 	  //The assertion was because even though RidLeft was covered
 	  //the node to which this ColStatDescList was attached, the node
 	  //did not produce any column in the RidLeft VEG (i.e. if RidLeft
 	  //represented a VEGREF). This is possible if the VEG contains a
 	  //constant, in such a case the VEGREG is covered by any node, since
 	  //any node can produce a constant. To cover such a VEG the node
 	  //does not need to produce any columns in the VEG.
 	  //This assertion was being hit for a valid query
 	  //Consider the scenario
 	  //create table t1(x integer);
 	  //create table t2(y integer);
 	  //create table t3(z integer);
 	  //display
 	  //select *
 	  //from t3, t2, t1
 	  //where t1.x in (1, 2, 3, 4, 5, 6, 7, 8, 10 ,
 	  //               11 ,12 ,13, 14, 15 ,16, 17 ,18, 19, 20,
 	  //               21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
 	  //               31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 	  //               41, 42)
 	  //   and
 	  //      t1.x = t2.y
 	  //   and
 	  //t2.y = 0;
 	  //
 	  //The code we are in is called when we have a large in
 	  //list (>42 items)
 	  //
 	  //this could happen if we get a VEGREF (i.e. RidLeft is a VEGREF)
 	  //that contains a constant. In such a case, the VEG will be considered
 	  //covered as it contains a constant and could be pushed down to a
 	  //scan that does not produce any columns in the VEG.
 	  // Keep a check, this ASSERTION could be not relevant now. In that case
 	  // remove the following code.

 	  //if RidLeft is a VEGREF
 	  if(RidLeft.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE)
 	  {
 	    //Get the VEG referred by RidLeft
 	    VEG * RidLeftVEG = ((VEGReference*) RidLeft.getItemExpr())\
 	                          ->getVEG();
 	    //Get the set of valueIds in RidLeftVEG
 	    ValueIdSet RidLeftSet;
 	    RidLeftVEG->getAndExpandAllValues(RidLeftSet);

 	    //check if the VEG contains a constant
 	    NABoolean containsConstant = RidLeftSet.referencesAConstExpr();

 	    //If VEG contains a constant, then RidLeft is covered
 	    //by this node (i.e. node to which this ColStatDescList is attached)
 	    //even if it does not produce any of the columns in the VEG.
 	    if(containsConstant)
 	      return newRowCount;
 	  }

 	  //This should not happen and so assert in a
 	  //debug build
 	  // could not find a matching histogram ... huh?!
 	  // CCMPASSERT( FALSE ); // this should not happen!

     //CCMPASSERT is only compiled for debug builds
     //Therefore exit gracefully for a release build
     //Since this is a non-fatal error
     return newRowCount;
 	}

 	else
 	{
 	  ColStatDescSharedPtr predColumn = (*this)[histIdx];
 	  ColStatsSharedPtr predStats = predColumn->getColStatsToModify();

 	  if (inListCase && (predStats->getTotalUec() <= in_list_members ) )
 	  {
 	    // do nothing : assume no uec/rows are removed by
 	    // application of this large in-list
 	  }
 	  else
 	  {
             CostScalar redFactor = csOne;
             if (inListCase)
             {
 	    // Set the histogram's new totalUec to be the
 	    // number of in-list members, adjusting the
 	    // rowcount by the same proportion
 	      redFactor = (CostScalar( in_list_members ) / predStats->getTotalUec()).maxCsOne();
             }
             else
             {
               // it is a large OR predicate, but not derived from IN
               // list. Instead it could be a set of AND and ORs
               redFactor = (CostScalar (((BiLogic*)pred)->getNumLeaves()) / predStats->getTotalUec()).maxCsOne();
             }

 	    newRowCount =
 	      predStats->getRowcount() * redFactor;
 	    predStats->scaleHistogram(redFactor, redFactor);
 	    predStats->setBaseUec(predStats->getTotalUec());

 	    // after this approximation, we no longer completely "trust"
 	    // this histogram ...
 	    predStats->setFakeHistogram( TRUE );
 	    // Now, normalize all histograms to the same resultant rowcount.
 	    for( CollIndex i = 0; i < entries(); i++ )
 	    {
 	      const CostScalar & oldCount =
 	        (*this)[i]->getColStats()->getRowcount();
 	      if( oldCount != newRowCount ) // && oldCount != 0
 	        (*this)[i]->synchronizeStats( oldCount, newRowCount );
 	    }
             // update tempRowcount which is baing used later as the starting
             // cardinality for the remaining bi-logic predicate
             tempRowcount = newRowCount;
 	  }
 	}
        } // maxSelectivity == NULL
       } // large_in_list case handled

       // ----------------------------------------------------------------------
       // If this predicate is simply a large OR-tree that resulted
       // from an IN-list transformation, then we've already handled
       // that case above, so we're done applying this predicate
       // ----------------------------------------------------------------------

       if ( NOT large_in_list )
       {
 	// the following two lists, one of ValueId's, one of char *'s, are used
 	// to keep track of the TRUE shape-changed flag's of this OR-node's
 	// parents -- assuming the parent is not also an OR
 	//
 	// this is needed because when we look at the children of the OR,
 	// we set the shape-changed flags for all columns to be FALSE
 	//
 	// if we're to do the right thing, we will need to know later on
 	// whether a column was shape-changed before it was passed to this OR node
 	ValueIdList shapeChangedItemExprs;
         NAList<ComUID> shapeChangedHistIds(CmpCommon::statementHeap());

         CollIndex dictSize = MIN_ONE(this->entries());

 	NAHashDictionary<ValueId, ValueIdSet> priorAppliedPredsSet (
 	  &(ValueIdHashFn), dictSize, TRUE, HISTHEAP );

 	NAHashDictionary<ValueId, ValueIdSet> priorMergeStateSet (
 	  &(ValueIdHashFn), dictSize, TRUE, HISTHEAP );

 	// Make a copy of the current input ColStatDescList THIS;
 	// Apply the left predicate to THIS;
 	// Apply the right predicate to the manufactured copy;
 	// OR the two lists together, into THIS...
 	//    carefully, because the two lists no longer have to be the
 	//    same length;
 	// Discard the copy (it'll go away since it's a stack variable)
 	ColStatDescList copyStatsList( HISTHEAP );
 	CollIndex i;
 	copyStatsList.makeDeepCopy (
 	  *this,
 	  1,        // scale remains unchanged
 	  FALSE	    // clear shapeChanged flag
 	  );

 	// for OR's, clear THIS's shapeChanged_ flag (copyStatsList cleared above)
        if (maxSelectivity == NULL) {
 	for ( i = 0; i < entries(); i++ )
 	{
 	  ColStatDescSharedPtr copyStatDesc = (*this)[i];
 	  ColStatsSharedPtr copyStats = copyStatDesc->getColStatsToModify();

 	  // need to store these shape-changed flags in order to keep
 	  // track of all the information that our parent gave us!
 	  // --> we don't do this if our parent is an OR
 	  if ( exprOpCode != ITM_OR )
 	  {
 	    const ValueId & saveExpr = copyStatDesc->getVEGColumn().getItemExpr()->getValueId();

 	    if ( copyStats->isShapeChanged() )
 	    {
 	      shapeChangedItemExprs.insert( saveExpr );
 	      shapeChangedHistIds.insert( copyStats->getHistogramId() );
 	    }

 	    if (!copyStatDesc->getAppliedPreds().isEmpty())
 	    {
 	      ValueId    * key   = new (HISTHEAP) ValueId( saveExpr );
 	      ValueIdSet * keyValue  = new (HISTHEAP) ValueIdSet( copyStatDesc->getAppliedPreds() );
 	      priorAppliedPredsSet.insert(key, keyValue);
 	    }

 	    if (copyStatDesc->getMergeState() != copyStatDesc->getColumn())
 	    {
 	      ValueId    * key   = new (HISTHEAP) ValueId( saveExpr );
 	      ValueIdSet * keyValue  = new (HISTHEAP) ValueIdSet( copyStatDesc->getMergeState() );
 	      priorMergeStateSet.insert(key, keyValue);
 	    }
 	  } // now we've saved the ItemExpr / Histogram Id for every
 	  // column that was shape-changed
 	  copyStats->setShapeChanged( FALSE );  // for OR'ing, init clear.
 	}
        } // maxSelectivity == NULL

 	// track information for probabilistic newRowCount determination
 	CostScalar saveTempRowCount = tempRowcount;

 	tempRowcount = MIN_ONE_CS( tempRowcount );  // prevent division by Zero!

 	// Note, if we use numOuterColStats to the two
 	// estimateCardinality() function calls below, it may
 	// get changed to a value greater than the number of entries
 	// in copyStatsList after the first function call.
 	// When that happens and we pass it to the second function call
 	// below, it will cause an assertion failure.

 	// Copies of numOuterColStats to be applied below.
 	CollIndex leftNumOuterColStats  = numOuterColStats;
 	CollIndex rightNumOuterColStats = numOuterColStats;

 	ValueIdSet originalUnResolved = unresolvedPreds;

     CostScalar maxSel1 = 1.0, maxSel2 = 1.0;
 	CostScalar leftRowCount = estimateCardinality(
 						      tempRowcount,
 						      leftChildSet,
 						      outerReferences,
                                                       expr,
 						      selHint,
 						      cardHint,
 						      leftNumOuterColStats,
 						      unresolvedPreds,
 						      mergeMethod,
 						      ITM_OR,
                                                       doEstCard ? NULL :
                                                       &maxSel1
 						      );
 	CostScalar prob1, prob2;
 	prob1 = leftRowCount / tempRowcount;

 	CostScalar rightRowCount = copyStatsList.estimateCardinality(
 						      tempRowcount,
 						      rightChildSet,
 						      outerReferences,
                                                       expr,
 						      selHint,
 						      cardHint,
 						      rightNumOuterColStats,
 						      originalUnResolved,
 						      mergeMethod,
 						      ITM_OR,
                                                       doEstCard ? NULL :
                                                       &maxSel2
 						      );
 	prob2 = rightRowCount / tempRowcount;
     // maxSelectivity(p1 OR p2)==MIN(1,maxSelectivity(p1)+maxSelectivity(p2))
     if (!doEstCard)
       {
         *maxSelectivity = MINOF(*maxSelectivity, MINOF(maxSel1+maxSel2, 1.0));
       }

     if (maxSelectivity == NULL)
     {
       // since both left and right colStats have been initialized by the
       // same list of histograms, these should have equal number of entries.
       // if not, something when wrong, so ignore this predicate
       // at the customer.
       if (leftNumOuterColStats != rightNumOuterColStats )
       {
         CCMPASSERT( leftNumOuterColStats == rightNumOuterColStats );
         return newRowCount;
       }
 	numOuterColStats = leftNumOuterColStats;

 	// Under unusual conditions, prob1 and prob2 can be greater than
 	// one!  (i.e., if we have a long in-list predicate applied to a
 	// histogram that only has a few rows).  So, in other words, the
 	// application of some predicates can result in an increase in
 	// estimated rowcount!
 	//
 	// However, the calculation for newRowCount below (involving
 	// p1+p2-p1*p2) will not produce a reasonable result if we're
 	// not using probabilities!  It makes absolutely no sense to
 	// talk about a "probability" that's >1!
 	//
 	// So, in the case where prob1 or prob2 is >1, we need to
 	// scale down the corresponding CSDL to have the original
 	// rowcount, then set prob1/prob2 to be 1.  We don't want
 	// predicate application to ever increase rowcounts!

 	if ( prob1 > csOne )
 	{
 	  // scale down the histograms
 	  for ( CollIndex i = 0; i < this->entries(); i++ )
 	    (*this)[i]->synchronizeStats( leftRowCount, tempRowcount );

 	  // Don't forget to reset these two values!
 	  leftRowCount = tempRowcount;
 	  prob1 = csOne;
 	}
 	if ( prob2 > csOne )
 	{
 	  for ( CollIndex i = 0; i < copyStatsList.entries(); i++ )
 	    (copyStatsList)[i]->synchronizeStats( rightRowCount, tempRowcount );

 	  rightRowCount = tempRowcount;
 	  prob2 = csOne;
 	}

         double prob1prob2 = prob1.value() * prob2.value();

 	// estimate for case where histogram merges are not possible.
 	newRowCount = saveTempRowCount *
             ( prob1 + prob2 - CostScalar(prob1prob2) );

 	// overLappedRowcount first was saveTempRowCount;
 	// then, it was leftRowCount + rightRowCount
 	// But, the real maximum amount of overlap between two histograms
 	// is determined by the smaller side's rowcount.
 	CostScalar overLappedRowcount = MINOF( leftRowCount, rightRowCount );

 	// predicates are only completely un-resolved when they are
 	// un-resolved on Both sides of the OR.
 	unresolvedPreds.intersectSet( originalUnResolved );

 	// ---------------------------------------------------------------
 	// walk over every entry in the updated THIS.
 	// Find its match in the corresponding list.
 	// An entry matches when the first columns match and the histogram ID
 	// matches.
 	// -----------------------------------------------------------------

 	// Did they change consistently?
 	NABoolean bothChangedWheneverOneChanged = TRUE;
 	// Did at least one change?
 	NABoolean atLeastOneChanged = FALSE;
 	ValueIdSet matchedRight;
 	CollIndex currentL, currentR;

 	if( this->entries() != copyStatsList.entries() )
 	{
 	  // copyStatsList is a copy of THIS statsList, during
 	  // the process of applying predicates to THIS and
 	  // the copy, we should not have dropped any columns.
 	  Int32 stophere = 0;
 	}

 	for( currentL = 0; currentL < entries(); currentL++ )
 	{
 	  const ItemExpr * leftItem =
 	    (*this)[currentL]->getVEGColumn().getItemExpr();
 	  ComUID leftHistId =
 	    (*this)[currentL]->getColStats()->getHistogramId();

 	  NABoolean found = FALSE;
 	  currentR = 0;
 	  while( NOT found && currentR < copyStatsList.entries() )
 	  {
 	    const ItemExpr * rightItem =
 	      (copyStatsList)[currentR]->getVEGColumn().getItemExpr();
 	    ComUID rightHistId =
 	      (copyStatsList)[currentR]->getColStats()->getHistogramId();

 	    if( leftItem->getValueId() == rightItem->getValueId() &&
 		(leftHistId == rightHistId) )
 	      found = TRUE;
 	    else
 	      currentR++;
 	  }

       if (found == FALSE)
       {
         // if could not find the histogram to be merged
         // return for release mode. Side effect could be
         // increased cardinality
         CCMPASSERT( found == TRUE );
         return newRowCount;
       }
 	  ColStatsSharedPtr leftColStats  =
 	    (*this)[currentL]->getColStatsToModify();
 	  ColStatsSharedPtr rightColStats =
 	    (copyStatsList)[currentR]->getColStatsToModify();

 	  // If either side has changed, perform an OR merge.
 	  // If both sides have shape changed, find how much rowcount
 	  //   was overlapped between the two.
 	  //  Save the smallest amount of overlap and use it to
 	  //   subtract from the sum of the two rowcounts when all
 	  //   shape changes come in pairs.

 	  // neither changed
 	  if( NOT leftColStats->isShapeChanged()  &&
 	      NOT rightColStats->isShapeChanged() )
 	  {
 	    copyStatsList.removeAt( currentR );
 	  }
 	  else  // at least one side shape-changed.
 	  {
 	    atLeastOneChanged = TRUE;

 	    CostScalar saveRowcount =
 	      leftColStats->getRowcount() + rightColStats->getRowcount();
 	    (*this)[currentL]->mergeColStatDesc(
 	      (copyStatsList)[currentR],
                             OR_MERGE,
                             FALSE,
                             exprOpCode
 	      );

 	    // for debugging informational value, track the
 	    // histograms impacted by an OR.
 	    (*this)[currentL]->appliedPreds().insert( id );

             if(rightColStats->isSelectivitySetUsingHint())
               leftColStats->setSelectivitySetUsingHint();

 	    // both sides changed?
 	    if ( leftColStats->isShapeChanged() &&
 		 rightColStats->isShapeChanged() )
 	    {
 	      CostScalar curDif = saveRowcount -
 		(*this)[currentL]->getColStats()->getRowcount();
 	      if (curDif < overLappedRowcount)
 		overLappedRowcount = curDif;
 	    }
 	    else // only one side changed!
 	    {
 	      bothChangedWheneverOneChanged = FALSE;
 	    }

 	    copyStatsList.removeAt( currentR );
 	  }
 	}  // for     (currentL)

 	if( copyStatsList.entries() != 0 )
 	{
       CCMPASSERT (copyStatsList.entries() == 0);
 	  // The OR merge between THIS and the copy did not go properly
 	  return newRowCount;
 	}

 	// Determine the resultant rowcount.  If not all columns overlapped,
 	// use previously determined rowcount
 	// (i.e. rowcount * (prob1 + prob2) - (prob1 * prob2))
 	//  otherwise just use leftRows + rightRows - overlappedRows.

 	if( atLeastOneChanged && bothChangedWheneverOneChanged )
 	  newRowCount = leftRowCount + rightRowCount - overLappedRowcount;

 	newRowCount = MINOF(newRowCount, origRowCount);

 	// If user specified selectivity for this predicate, we need to make
 	// adjustment in reduction to reflect that.
 	if((exprOpCode == REL_SCAN) &&
 	   (pred->isSelectivitySetUsingHint()))
 	{
 	  ColStatDescSharedPtr firstColStatDesc = (*this)[0];
 	  firstColStatDesc->applySelIfSpecifiedViaHint(pred, origRowCount);
 	  newRowCount = firstColStatDesc->getColStats()->getRowcount();
 	}

 	tempRowcount = newRowCount;

 	// Now, normalize all histograms to the same resultant rowcount.
 	for( i = 0; i < entries(); i++ )
 	{
 	  const CostScalar & oldCount =
 	    (*this)[i]->getColStats()->getRowcount();
 	  if( oldCount != newRowCount ) // && oldCount != 0
 	    (*this)[i]->synchronizeStats( oldCount, newRowCount );
 	}


 	if( exprOpCode != ITM_OR )
 	  // now, restore our parents' TRUE shape-changed flags after we come out
 	  // of the OR -- we lost this information when we started the OR and
 	  // set all shape-changed flags to be FALSE
 	  // ** this has to be so complicated because the number of columns
 	  // ** in THIS may have been reduced by the right child
 	  // ** --> or does this not have to be as cautious as it is?
 	{
 	  for( i = 0; i < shapeChangedItemExprs.entries(); i++ )
 	  {
 	    NABoolean found = FALSE;
 	    for( CollIndex j = 0; j < entries(); j++ )
 	    {
 	      const ValueId & checkExpr =
 		        (*this)[j]->getVEGColumn().getItemExpr()->getValueId();
 	      ComUID checkHistId =
 		        (*this)[j]->getColStats()->getHistogramId();

 	      NABoolean equalExprs   = ( shapeChangedItemExprs[i] == checkExpr );
 	      NABoolean equalHistIds = ( shapeChangedHistIds[i] == checkHistId );

 	      if ( equalExprs && equalHistIds )
 	      {
 		      found = TRUE;
 		      (*this)[j]->getColStatsToModify()->setShapeChanged(TRUE);
 		      break;
 	      }
 	    }
 	    CCMPASSERT( found == TRUE );
 	  }

 	  // Restore the original appliedPred and mergeState sets which were removed
 	  // before applying OR-related join predicates
 	  for( CollIndex j = 0; j < entries(); j++ )
 	  {
 	    (*this)[j]->appliedPreds().clear();
 	    (*this)[j]->mergeState().clear();
 	    const ValueId & checkExpr = (*this)[j]->getVEGColumn().getItemExpr()->getValueId();
 	    if(priorAppliedPredsSet.contains(&checkExpr))
 	      (*this)[j]->appliedPreds().insert(*(priorAppliedPredsSet.getFirstValue(&checkExpr)));
 	    if(priorMergeStateSet.contains(&checkExpr))
 	    {
 	      // At Join-level, we will have histograms from both sides with the same VEG column.
 	      // We need to make sure that the valueId of the column is part of saved mergeState
 	      // before copying the mergeState.
 	      ValueIdSet mergeState = *(priorMergeStateSet.getFirstValue(&checkExpr));
 	      if(mergeState.contains((*this)[j]->getColumn()))
                 (*this)[j]->mergeState().insert(mergeState);
 	      else
 		(*this)[j]->mergeState().insert((*this)[j]->getColumn());
 	    }
 	    else
 	      (*this)[j]->mergeState().insert((*this)[j]->getColumn());
 	  }
 	}
        } // maxSelectivity == NULL
       } // NOT large_in_list
     } // ITM_OR
     else  // error: It isn't ITM_AND or ITM_OR ... oops
     {
       CCMPASSERT("Unsupported binary logic operator" );
       return newRowCount;
     }

     if(exprOpCode == REL_SCAN && !maxSelectivity && (CmpCommon::getDefault(COMP_BOOL_67) == DF_ON))
     {
       ValueIdSet predSet;
       pred->getLeafPredicates(predSet);

       NABoolean skip = FALSE;
       ItemExpr * column = NULL;
       ItemExpr *lhs = NULL;

       for( ValueId tempId = predSet.init();
 	  predSet.next( tempId );
 	  predSet.advance( tempId ) )
       {
         ItemExpr *tempExpr = tempId.getItemExpr();
 	if (tempExpr->getArity() > 0)
 	  lhs = tempExpr->child(0);
 	else
 	  lhs = NULL;

         if (!tempExpr->equatesToAConstant() ||
 	    (column && (lhs != column)) ||
 	    (!lhs))
         {
           skip = TRUE;
           break;
         }
         else if(!column)
           column = lhs;
       }

       if(skip)
         continue;

       CollIndex i;
       column = column->getLeafValueIfUseStats();
       ValueId colId = column->getValueId();

       const BaseColumn * baseCol = colId.castToBaseColumn();
       if(!baseCol)
         continue;
       else
         colId = baseCol->getValueId();

       if(!getColStatDescIndexForColumn(i, colId))
         continue;

       //calculate the reduction from this predicate
       CostScalar rowCnt = (*this)[i]->getColStats()->getRowcount();
       if ( rowCnt < origRowCount )
       {
         ValueId    * key   = new (HISTHEAP) ValueId(colId);
         CostScalar * value = new (HISTHEAP) CostScalar( rowCnt / origRowCount );
         ValueId * result = biLogicPredReductions.insert( key, value );
       }
     }
   }
   return newRowCount;
 } // ColStatDescList::applyBiLogicPred

 // -----------------------------------------------------------------------
 //  ColStatDescList::applyPred
 //
 //  A CSDL::estimateCardinality() subroutine
 //
 //  This method applies the effect of a predicate on the set of column
 //  statistics.  This method supports the unary predicates IS [NOT] NULL
 //  and IS [NOT] UNKNOWN, as well as binary predicates involving the
 //  operators {>,>=,<,<=,=,<>}, where one of the operands is a literal
 //  constant.
 //
 //  NOTE: Support for nonVEG <col A> <eqOp> <col B> is provided in this
 //  routine; ColStatDesc::modifyStats takes effect only for inequality
 //  predicates involving constants.  Support of <col A> <ineqOp> <col B>
 //  will (eventually) need to be added.
 //
 //  NOTE: The code is written as if the same predicate can apply to more
 //  than one ColStatDesc in the given ColStatDescList.  This may never
 //  happen, but it doesn't hurt to code it this way.
 //
 // This is the new version of this function which facilitates applying all
 // predicates at once.
 // -----------------------------------------------------------------------
 NABoolean
 ColStatDescList::applyPred (ItemExpr *pred,
                             CostScalar & newRowcount,
                             CollIndex & numOuterColStats,
                             MergeType mergeMethod,
                             OperatorTypeEnum exprOpCode,
                             CostScalar *maxSelectivity)
 {
   // sanity check: operator understood by synthesis?
   if ( NOT pred->synthSupportedOp() ) return FALSE;

   CostScalar rowCountBeforePreds = newRowcount;
   CostScalar tempRowcount = csZero;
   CostScalar newUec;
   CollIndex i;
   NABoolean appliedPredicateFlag = FALSE;
   NABoolean negate = FALSE;

   // Check to see if the predicate is a derivative from Like predicate
   // If it is then it can only be either >= or <
   NABoolean derivedFromLike = FALSE;
   if ( (pred->getOperatorType() == ITM_GREATER_EQ) OR
       (pred->getOperatorType() == ITM_LESS) )
   {
     BiRelat *br = (BiRelat *) pred;
     derivedFromLike = br->derivativeOfLike();
   }

   // ---------------------------------------------------------------------
   // first check for ITM_IS_NULL, ITM_IS_NOT_NULL, ITM_IS_UNKNOWN, and
   // ITM_IS_NOT_UNKNOWN or the more complex case where the predicate
   // is NOT an equality predicate which does NOT involve a <constant>.
   // ---------------------------------------------------------------------
   if (	      (	   pred->getOperatorType() == ITM_IS_NULL
 		OR pred->getOperatorType() == ITM_IS_NOT_NULL
 		OR pred->getOperatorType() == ITM_IS_UNKNOWN
 		OR pred->getOperatorType() == ITM_IS_NOT_UNKNOWN
 	      )
        OR NOT (	    pred->getOperatorType() == ITM_EQUAL
 		AND pred->child(0)->castToConstValue( negate ) == NULL
 		AND pred->child(1)->castToConstValue( negate ) == NULL
 	      )
        OR     (pred->equatesToAConstant())
      )
   {
     CostScalar oldRowcountForSelAdj;

     // Simple Case: look for <op> <col>,
     //                       <col> <op> <constant> or
     //                       <constant> <op> <col> predicates
     for ( i = numOuterColStats /*fix to ALM5128*/; i < entries(); i++ )
     {
       ColStatDescSharedPtr thisColStatDesc = (*this)[i];

       ColStatsSharedPtr colStats = thisColStatDesc->getColStats();
       if ((colStats->isVirtualColForHist() ||
           colStats->isMCforHbasePartitioning ())) // ignore MC stats intervals for now, only used by the splitting logic
 	continue;

       if(pred->isSelectivitySetUsingHint())
         oldRowcountForSelAdj = colStats->getRowcount();

       CostScalar totalUecBeforePreds = colStats->getTotalUec();
       CostScalar baseUecBeforePreds = colStats->getBaseUec();
       CostScalar predMaxSel = 1.0;

       if ( thisColStatDesc->modifyStats
            ( pred, tempRowcount, maxSelectivity ? &predMaxSel : NULL) )
       {
 		appliedPredicateFlag = TRUE;
 		if(derivedFromLike)
 		{
                   // for LIKE predicates, maxSelectivity will stay 1.0

 		  // This range predicate is a derivative of Like predicate.
 		  // Hence we cannot use the usual selectivity obtained after
 		  // applying range predicates. Here we use a portion of the
 		  // default selectivity for like predicates to obtain the
 		  // resultant rowcount. We still continue to use the histograms
 		  // obtained after applying range predicates the usual way
 		  // The selectivity would be applied to the initial rowcount
 		  // before any predicates were applied.
                  if (maxSelectivity == NULL) {
 		  BiRelat *br = (BiRelat *) pred;

 		  br->adjustRowcountAndUecForLike((*this)[i],
 		                                   rowCountBeforePreds,
 						   totalUecBeforePreds,
 						   baseUecBeforePreds);
                  } // maxSelectivity == NULL
 		}
         else // !derivedFromLike
         {
           // modifyStats has just computed selectivity(p) by side effecting
           // the histogram. For cases where maxSelectivity(p)=selectivity(p)
           // we want to reflect that in maxSelectivity.
           if (pred->maxSelectivitySameAsSelectivity() && maxSelectivity
               && newRowcount > csZero)
             {
               predMaxSel = MINOF(predMaxSel, tempRowcount/newRowcount);
               *maxSelectivity = MINOF(predMaxSel, *maxSelectivity);
             }
         }

         // If user specified selectivity for this predicate, we need to make
         // adjustment in reduction to reflect that.
         thisColStatDesc->applySelIfSpecifiedViaHint(pred, oldRowcountForSelAdj);
       }
     }

     // $$$ WE NO LONGER WANT TO NORMALIZE HISTOGRAMS AFTER EVERY
     // $$$ PREDICATE!
     //
     // Instead, we will normalize all of the histograms' rowcounts after
     // we have applied all of the predicates.

     return appliedPredicateFlag;
   } // simple case

   // maxSelectivity computation is done
   if (maxSelectivity) return appliedPredicateFlag;

   // ---------------------------------------------------------------------
   // Complex Case:  The input is an Equality Predicate not of the form
   //   <col> = <constant>.
   // Equality predicates can only be performed amongst:
   //
   //   ITM_INSTANTIATE_NULL
   //   ITM_VALUEIDUNION
   //   ITM_VEG_REFERENCE
   //   ITM_UNPACKCOL
   //
   // So, look for that case.  Actions performed depend upon the type of the
   // join, and the positions of the usable entries in the ColStatDescList.
   //
   // But before checking anything further, make sure that this predicate
   // is not a no-op of the form <expr_1> = <expr_1>.
   // ---------------------------------------------------------------------
   ItemExpr * lhs = pred->child(0);
   ItemExpr * rhs = pred->child(1);

   OperatorTypeEnum predExprType = pred->getOperatorType();

   if ( (*lhs) == (*rhs) )
     return TRUE;  // indicate that this no-op predicate was applied

   if (predExprType == ITM_EQUAL)
   {
     lhs = lhs->getLeafValueIfUseStats();
     rhs = rhs->getLeafValueIfUseStats();
   }

   OperatorTypeEnum leftExprType = lhs->getOperatorType();
   OperatorTypeEnum rightExprType = rhs->getOperatorType();

   // Special handling for predicates of the following form:
   // T1.A = max(T2.B);
   //
   // If there were no local predicates on T2 prior to join,
   // then we can replace max(T2.B) with the histogram maximum
   // boundary value of T2.B and apply that predicate instead.
   // This is because boundary values are no longer reliable if
   // any local predicates have been applied to the same table.

   if ((CmpCommon::getDefault(HIST_MIN_MAX_OPTIMIZATION) == DF_ON) &&
       (predExprType == ITM_EQUAL) &&
       (leftExprType == ITM_VEG_REFERENCE) &&
       (rightExprType == ITM_MIN || rightExprType == ITM_MAX))
   {
     // Sometimes, aggregate is transformed to max(max(col)),
     // need to do the following to extract column.
     while(rhs->getOperatorType() == ITM_MIN || rhs->getOperatorType() == ITM_MAX)
       rhs = rhs->child(0);

     CollIndex i = 0;

     // check that the right hand histogram exists
     if ( getColStatDescIndexForColumn(i, rhs->getValueId()) )
     {
       ColStatDescSharedPtr rhsStatDesc = (*this)[i];
       ColStatsSharedPtr rhsColStats = rhsStatDesc->getColStats();
       ValueId colId = rhsStatDesc->getColumn();
       BaseColumn * colExpr = colId.castToBaseColumn();

       // Need to make sure there is no local preds on the right hand side
       // (where aggregate exists).
       if( colExpr && colExpr->getTableDesc()->getLocalPreds().entries() == 0)
       {
         // check that the left hand histogram exists
         if (getColStatDescIndexForColumn(i, lhs->getValueId()))
         {
           ColStatDescSharedPtr lhsStatDesc = (*this)[i];
           ColStatsSharedPtr currLhsColStats = lhsStatDesc->getColStats();
           CostScalar currLhsScaleFactor = currLhsColStats->getScaleFactor();
           CostScalar currLhsRC = (currLhsColStats->getRowcount()/currLhsScaleFactor);

           // Need to get original full histogram
           const NAColumnArray &columnList = currLhsColStats->getStatColumns();
           NATable * lhsTable = ((NATable *)columnList[0]->getNATable());
           StatsList &lhsStats = lhsTable->getStatistics();
           Lng32 colPosition = columnList[0]->getPosition();


           ColStatsSharedPtr origLhsColStats = lhsStats.getSingleColumnColStats(colPosition);

           // Make a deep copy
           ColStatsSharedPtr copyOfOrigLeftColStats =
                              ColStats::deepCopy(*origLhsColStats, HISTHEAP);

           CostScalar origLhsRC = origLhsColStats->getRowcount();

           //Apply the reduction to the orig histogram if any from local preds
           CostScalar localPredReduction = currLhsRC/origLhsRC;
           if(localPredReduction != csOne)
             copyOfOrigLeftColStats->scaleHistogram(localPredReduction);

           // Before we can perform the optimization, we need to make sure we have
           // a good hash value to use to search via MIN/MAX value for an interval.
           // See ColStats::setToSingleValue(const EncodedValue&, ConstValue*, ...)
           // in the block of code where the constant is in MFV.
           // Here since it is not easy to construct a ConstValue object out of the MIN/MAX
           // value of EncodedValue, we will restrict ourselves to MIN/MAX value ==
           // one of the frequent value from the rhs.
           const FrequentValueList& fvList = rhsColStats->getFrequentValues();

           if ( fvList.entries() > 0 ) {

              EncodedValue normValue;
              FrequentValue fv;

              if(rightExprType == ITM_MIN) {
                normValue = rhsColStats->getMinValue();
                fv = fvList[0];
              } else {
                normValue = rhsColStats->getMaxValue();
                fv = fvList[fvList.entries()-1];
              }

              if ( fv.getEncodedValue() == normValue ) {

                 // Reduce to single interval via a key. The key value is
                 // normValue and the hash stored in fv. The interface is
                 // a little bit of awkward. But we will live with it for now.
                 copyOfOrigLeftColStats->setToSingleValue(normValue, NULL, NULL, &fv);

                 // Need to scale it back by original scaling factor
                 copyOfOrigLeftColStats->scaleHistogram(currLhsScaleFactor);

                 // Finally overlay the original histogram onto existing ColStatDesc
                 lhsStatDesc->setColStats(copyOfOrigLeftColStats);

                 return TRUE;
             }
           }
         }
       }
     }
   }

   // For 'Sample' inserts 'notCovered' over the expression, in order to avoid
   // its pushing down. But this should not have any effect on the selectivity
   // hence we shall remove any not_covered from the expressions. And estimate
   // cardinality like a regular predicate.

   NABoolean notCovered = FALSE;

   if (leftExprType == ITM_NOTCOVERED)
   {
     lhs = lhs->child(0);
     if (predExprType == ITM_EQUAL)
       notCovered = TRUE;
   }

   if (rightExprType == ITM_NOTCOVERED)
   {
     rhs = rhs->child(0);
     if (predExprType == ITM_EQUAL)
       notCovered = TRUE;
   }

   if (CmpCommon::getDefault(COMP_BOOL_48) == DF_ON)
   {
     if ( (rightExprType == ITM_NATYPE ) AND
 	  (leftExprType == ITM_VEG_REFERENCE ) )
       notCovered = TRUE;

   }

   if (!notCovered)
   {
     if (  (rightExprType == ITM_ROWSETARRAY_SCAN ) OR
 	  (leftExprType == ITM_ROWSETARRAY_SCAN ) )
     {
     }
     else
     {
       if ( NOT (      ( leftExprType == rightExprType )
 		  AND ( leftExprType == ITM_INSTANTIATE_NULL
 			OR leftExprType == ITM_VALUEIDUNION
 			OR leftExprType == ITM_VEG_REFERENCE
 			OR leftExprType == ITM_UNPACKCOL
 		      )
 		)
 	  )
       {
 	return FALSE;
       }
     }
   }
   // ---------------------------------------------------------------------
   // IF the referenced ColStatDesc's are provided, this equality predicate
   // *can* be performed.
   //
   // Note that Left and Right are relative to the operands of the equality
   // predicate.  Later they need to be oriented to the inner/outer sides
   // of the input ColStatDescList.
   // ---------------------------------------------------------------------
   CollIndexList leftStatsToMerge(STMTHEAP);
   CollIndexList rightStatsToMerge(STMTHEAP);

   identifyMergeCandidates( lhs, leftStatsToMerge );
   if ( leftStatsToMerge.entries() == 0 )
     return FALSE;

   identifyMergeCandidates( rhs, rightStatsToMerge );
   if ( rightStatsToMerge.entries() == 0 )
     return FALSE;

   // Take care of the possibility that one, or more, of the given column
   // references referenced more than one column in the ColStatDescList.
   // If this happens : perform an equijoin amongst the multiple columns.
   CollIndex leftRootIndex  = leftStatsToMerge[0];
   ColStatDescSharedPtr leftRootDesc = (*this)[leftRootIndex];

   CollIndex rightRootIndex = rightStatsToMerge[0];
   ColStatDescSharedPtr rightRootDesc = (*this)[rightRootIndex];

   CollIndex rootIndex;
   ColStatDescSharedPtr rootStatDesc;

   if ( leftRootIndex < rightRootIndex )
   {
     rootIndex    = leftRootIndex;
     rootStatDesc = leftRootDesc;
   }
   else
   {
     rootIndex    = rightRootIndex;
     rootStatDesc = rightRootDesc;
   }

   ColStatsSharedPtr rootColStats = rootStatDesc->getColStatsToModify();

   ColStatsSharedPtr leftColStats = leftRootDesc->getColStatsToModify();
   CostScalar leftRowcount = leftColStats->getRowcount();

   ColStatsSharedPtr rightColStats = rightRootDesc->getColStatsToModify();
   CostScalar rightRowcount = rightColStats->getRowcount();

   // Finally, given a 'root' ColStatDesc,
   //   determine whether or not we have applied this predicate
   const ValueId & predValueId = pred->getValueId();

   if ( rootStatDesc->isPredicateApplied( predValueId ) ||  // Already applied?
        leftRootIndex == rightRootIndex )                   // or no-op?
     return TRUE;

   rootStatDesc->addToAppliedPreds( predValueId );

   CostScalar saveRowcount = rootColStats->getRowcount();
   CostScalar saveUec      = rootColStats->getTotalUec();

   //Under a OR we do not want to handle vegRefs because
   // 1. for predicates a=b and ( c =b or c = d)
   // if we do perform a=b when we are computing c =b we will
   // applying the same predicates twice as a=b later gets applied
   // also.
   // 2. Application of vegref under or gives rise to an unexpected
   // ColStats as we unpack the histograms to get all the merged
   // ColStats back

   if( exprOpCode != ITM_OR )
   {
     mergeSpecifiedStatDescs (
       leftStatsToMerge, leftRootIndex,
       mergeMethod, numOuterColStats,
       saveRowcount, saveUec,
       FALSE,
       exprOpCode); // not for VEGPred

     // repeat for the right equivalence group.
     mergeSpecifiedStatDescs (
       rightStatsToMerge, rightRootIndex,
       mergeMethod, numOuterColStats,
       saveRowcount, saveUec,
       FALSE,
       exprOpCode); // not for VEGPred
   }

   // Gen Sol:10-090728-3382:
   // In some cases, the same column is participating in more than one join
   // predicates. When subsequent predicates (second onwards) are applied,
   // the effect of the first predicate application is lost. We will capture
   // the existing reduction here before additional predicates are applied.

   CostScalar leftReduction  = csOne, rightReduction = csOne, scaleFactor = csMinusOne;
   NABoolean isASemiJoin = ((mergeMethod == SEMI_JOIN_MERGE) || (mergeMethod == ANTI_SEMI_JOIN_MERGE));

   if(!isASemiJoin && leftRowcount < rowCountBeforePreds)
   {
     // Need to set the original scale factor before the prior predicates were applied
     if(leftRootDesc->getVEGColumn() == lhs->getValueId())
       scaleFactor = rowCountBeforePreds/leftColStats->getBaseRowCount();
     else
     {
       CollIndex nonVegEqualEntries = leftRootDesc->nonVegEquals().entries();
       for (i = 0; i < nonVegEqualEntries; i++)
       {
         ColStatDescSharedPtr tmpDesc = leftRootDesc->nonVegEquals()[i];
         if(tmpDesc->getVEGColumn() == lhs->getValueId())
         {
           scaleFactor = rowCountBeforePreds / tmpDesc->getColStats()->getBaseRowCount();
           break;
         }
       }
     }

     if(scaleFactor != csMinusOne)
     {
       leftReduction  = leftRowcount / rowCountBeforePreds;
       leftColStats->scaleHistogram(csOne/leftReduction);
       leftColStats->setScaleFactor(scaleFactor);
     }
   }

   scaleFactor = csMinusOne;

   if(!isASemiJoin && rightRowcount < rowCountBeforePreds)
   {
     // Need to set the original scale factor before the prior predicates were applied
     if(rightRootDesc->getVEGColumn() == rhs->getValueId())
       scaleFactor = rowCountBeforePreds/rightColStats->getBaseRowCount();
     else
     {
       CollIndex nonVegEqualEntries = rightRootDesc->nonVegEquals().entries();
       for (i = 0; i < nonVegEqualEntries; i++)
       {
         ColStatDescSharedPtr tmpDesc = rightRootDesc->nonVegEquals()[i];
         if(tmpDesc->getVEGColumn() == rhs->getValueId())
         {
           scaleFactor = rowCountBeforePreds / tmpDesc->getColStats()->getBaseRowCount();
           break;
         }
       }
     }

     if(scaleFactor != csMinusOne)
     {
       rightReduction  = rightRowcount / rowCountBeforePreds;
       rightColStats->scaleHistogram(csOne/rightReduction);
       rightColStats->setScaleFactor(scaleFactor);
     }
   }

   // --------------------------------------------------------------------
   // Now, safely past that unlikely situation, perform the join between
   // the left and right sides of the equality predicate.
   // For merging columns from the same table,
   // selectivity is HIST_NO_STATS_UEC.
   // --------------------------------------------------------------------
   MergeType localMergeMethod = mergeMethod;
   NABoolean joinFromSameTable = FALSE;

   if ( leftRootIndex < rightRootIndex )
   {
     ColStatDescSharedPtr tmpDesc (new (HISTHEAP)
 	   ColStatDesc( *rightRootDesc ), HISTHEAP);
     tmpDesc->setFromInnerTable( rightRootIndex >= numOuterColStats ?
 				TRUE : FALSE );

     if ( NOT ( leftRootIndex < numOuterColStats &&
 	       rightRootIndex >= numOuterColStats ) )
     {
       // even though the mergeMethod may never be used, but we are
       // initializing this for cases when COMP_BOOL_74 is OFF
       localMergeMethod = INNER_JOIN_MERGE;
       joinFromSameTable = leftRootDesc->mergeColStatDescOfSameTable(tmpDesc, exprOpCode);
     }

     if (!joinFromSameTable)
     {
       leftRootDesc->mergeColStatDesc(
 	tmpDesc,
 	localMergeMethod,
                         FALSE,  // don't force merge
                         exprOpCode
 	);
     }

     leftRootDesc->nonVegEquals().insert( tmpDesc );
   }
   else
   {
     ColStatDescSharedPtr  tmpDesc(new (HISTHEAP)
 	  ColStatDesc( *leftRootDesc ), HISTHEAP);
     tmpDesc->setFromInnerTable( leftRootIndex >= numOuterColStats ?
 				  TRUE : FALSE );
     if ( NOT ( rightRootIndex < numOuterColStats &&
 	       leftRootIndex >= numOuterColStats ) )
     {
       // even though the mergeMethod may never be used, but we are
       // initializing this for cases when COMP_BOOL_74 is OFF
       localMergeMethod = INNER_JOIN_MERGE;
       joinFromSameTable = rightRootDesc->mergeColStatDescOfSameTable(tmpDesc, exprOpCode);
     }

     if(!joinFromSameTable)
     {

       rightRootDesc->mergeColStatDesc(
 	tmpDesc,
 	localMergeMethod,
                                FALSE,  // don't force merge
                                exprOpCode
 	);
     }

     rightRootDesc->nonVegEquals().insert( tmpDesc );
   }

   if(leftReduction != csOne)
     rootColStats->scaleHistogram(leftReduction);

   if(rightReduction != csOne)
     rootColStats->scaleHistogram(rightReduction);

   // get the aggregate results following the latest merge
   newRowcount = rootColStats->getRowcount();
   newUec      = rootColStats->getTotalUec();

   // If user specified selectivity for this predicate, we need to make
   // adjustment in reduction to reflect that.
   rootStatDesc->applySelIfSpecifiedViaHint(pred, rowCountBeforePreds);

   // --------------------------------------------------------------------
   // For histograms that belong to the same equivalence class:
   //   IF this is NOT an Outer Join keep only the root
   //   IF this is an Outer Join, the original histograms need to be
   //      replaced by copies of the root; they will be null-augmented.
   //
   // This process is complicated by our prior separation of left-right.
   // --------------------------------------------------------------------
   //If under a or we did not merge so should not remove.
   CollIndex rightCount = 0;
   CollIndex leftCount = 0;
   if ( exprOpCode != ITM_OR )
   {
     rightCount = rightStatsToMerge.entries();
     leftCount  = leftStatsToMerge.entries();
   }

   CollIndex nextToMerge;

   // Walk the list backwards to avoid invalidating entries in the
   // statsToMerge sets
   while ( rightCount > 0 || leftCount > 0 )
   {
     if ( rightCount > 0 && leftCount > 0 )
     {
       if ( rightStatsToMerge[rightCount-1] > leftStatsToMerge[leftCount-1] )
       {
 	nextToMerge = rightStatsToMerge[rightCount-1];
 	rightCount--;
       }
       else
       {
 	nextToMerge = leftStatsToMerge[leftCount-1];
 	leftCount--;
       }
     }
     else if ( rightCount > 0 )
     {
       nextToMerge = rightStatsToMerge[rightCount-1];
       rightCount--;
     }
     else // leftCount > 0
     {
       nextToMerge = leftStatsToMerge[leftCount-1];
       leftCount--;
     }

     if ( nextToMerge != rootIndex )
     {
       if ( mergeMethod == OUTER_JOIN_MERGE &&
 	   nextToMerge >= numOuterColStats )
       {
 	ColStatsSharedPtr updateColStats =
 	  (*this)[nextToMerge]->getColStatsToModify();
 	updateColStats->overwrite( *rootColStats );

 	// Avoid having duplicate entries in the nonVegEquals_ of
 	// the root and the updated ColStatDesc.
 	if ( (*this)[nextToMerge]->nonVegEquals().entries() > 0 )
 	{
 	  (*this)[nextToMerge]->nonVegEquals().clear();
 	}
       }
       else
       {
 	removeAt( nextToMerge );

 	// maintain the numOuterColStats value when removing
 	// column statistics from the outer table's list.
 	if ( nextToMerge < numOuterColStats )
 	  numOuterColStats--;
       }
     }
   } // while

   // $$$ WE NO LONGER WANT TO NORMALIZE HISTOGRAMS AFTER EVERY
   // $$$ PREDICATE!
   //
   // Instead, we will normalize all of the histograms' rowcounts after
   // we have applied all of the predicates.

   return TRUE;

 } // ColStatDescList::applyPred

 // -----------------------------------------------------------------------
 // ColStatDescList::applyDefaultPred
 // A CSDL::estimateCardinality() subroutine
 //
 // This routine deals with all those predicates the two earlier routines
 // don't know how to deal with.
 //
 // NOTE: It is Presumed that this routine is only called with predicates
 // for which a default selectivity should be applied.
 //
 // NOTE: as elsewhere, it is assumed that if a predicate can be pushed
 // down to one instance of a column's histogram it has been pushed down to
 // all 'identical' instances of that column's histograms.
 //
 // I.e., if default selectivity was applied to 1 instance of some
 // histogram it has been applied to all same-group instances of that
 // histogram to which it can be applied.
 //
 // NOTE: One oddity of this routine is that if the histogram impacted by a
 // default predicate is 'real', default selectivity is applied once for
 // each distinct predicate.  However, if the histogram is 'fake', default
 // selectivity is only applied once for each Type of predicate.
 //
 // FUTURE WORK: Eventually, it ought to be possible to associate different
 // default selectivity with different joins.  Right now, all default joins
 // apply the same default selectivity.
 //
 // This is the new version of this function which facilitates applying all
 // predicates at once.
 // -----------------------------------------------------------------------

 // $$$ difference between this routine and the original :
 //
 // 1. When the predicate in question has not been previously applied, only
 // apply it to the histogram it affects, and then DO NOT normalize all
 // histograms to have that resulting rowcount
 //
 // 2. In the case of a global predicate (one that does not apply to any
 // particular histogram), we use the 'globalPredicateReduction' out
 // parameter to communicate with the calling routine that we need to apply
 // a global reduction to all histograms --> but LATER

 void
 ColStatDescList::applyDefaultPred (ItemExpr *pred,
                                    CostScalar & globalPredicateReduction,
                                    OperatorTypeEnum exprOpCode,
                                    CostScalar *maxSelectivity)
 {
   // -------------------------------------------------------------------
   // If any operand of this default predicate is an ITM_HOSTVAR *and* if
   // that ITM_HOSTVAR isSystemGenerated(), or it is a selectivity param,
   // then this predicate is to be treated (for purposes of estimating
   // selectivity) as a no-op.
   //
   // Note Selectivity Params are processed in
   // ColStatDescList::applyVEGPred() and ColStatDescList::applyPred().
   //
   // In ColStatDescList::applyPred(), predicates containing selectivity
   // params are identified through the virtual method
   // SelParameter::castToConstValue().
   // -------------------------------------------------------------------
   for ( Int32 arity = 0; arity < pred->getArity(); arity++ )
   {
     const ItemExpr * operand = (*pred)[arity].getPtr();
     if ( (operand->getOperatorType() == ITM_HOSTVAR) &&
 	 ((HostVar *) operand)->isSystemGenerated() ||
          (
          operand->getOperatorType() == ITM_CACHE_PARAM &&
          operand->castToSelParameter()
          )
        )
     {
       return;   // no further processing needed.
     }
   }

   const ValueId & origPredValueId = pred->getValueId();

   CostScalar defaultSel     = csOne;  // starting assumptions
   NABoolean alreadyApplied  = FALSE;
   NABoolean globalPredicate = TRUE;
   NABoolean statsExist = FALSE;

   OperatorTypeEnum op = pred->getOperatorType();

   if  (op == ITM_ONE_ROW || op == ITM_ONE_TRUE || op == ITM_ANY_TRUE)
   {
     pred = pred->child(0);
     op = pred->getOperatorType();
   }

   NABoolean isAFalsePred = FALSE;
   if(op == ITM_CONSTANT)
   {
     const ConstValue * constPred = (ConstValue *)pred->castToItemExpr();
     NAString constPredStr(constPred->getRawText()->data());
     constPredStr = constPredStr.strip(NAString::both);
     if(constPredStr == "0.")
       isAFalsePred = TRUE;
   }


   // fix soln 10-080605-3680: maxSelectivity(false) should be 0
   if (maxSelectivity && (op == ITM_RETURN_FALSE || isAFalsePred))
     {
       *maxSelectivity = MINOF(csZero, *maxSelectivity);
     }

   if (CmpCommon::getDefault(COMP_BOOL_107) == DF_ON)
   {
     if ( pred->synthSupportedOp() )
       alreadyApplied = pred->applyDefaultPred
         (*this, exprOpCode, origPredValueId, globalPredicate,
          maxSelectivity);
     else
       {
         // no need to pass maxSelectivity here because
         // maxSel(unsupported default predicate) == 1.0
         alreadyApplied = pred->applyUnSuppDefaultPred
           (*this, origPredValueId, globalPredicate);
       }
   }
   else
   {
   //
   // CASE 1 : predicate supports synthesis
   //
   if ( pred->synthSupportedOp() ) // does the operator support synthesis??
   {
 	// get the histogram on which the predicate is to be applied
 	ValueId column;

 	// leftColIndex contains the position of the left histogram whose statistics
 	// will be used for computing selectivity.
 	// In case the left child contains more than one columns,
 	// it would be the position of histogram with max UEC amongst left child
 	CollIndex leftColIndex;

 	// following two sets contain the leaf values of the respective children
 	ValueIdSet leftLeafValues;
 	ValueIdSet rightLeafValues;

 	// This boolean will be set to TRUE if it is an equality predicate with more
 	// than one column involved and COMP_BOOL_40 is ON.
 	// When counting the number of columns, it takes a combined set of the
 	// columns from the left and the right children
 	NABoolean equiJoinWithExpr = FALSE;

 	if (op == ITM_VEG_PREDICATE)
 	{
 	  // could be a VEG predicate with no children
 	  CollIndexList statsToMerge(STMTHEAP);

 	  // locate entries in this ColStatDescList that are associated
 	  // with the current VEG predicate.
 	  statsExist =
 		identifyMergeCandidates( pred, leftColIndex, statsToMerge );
 	}
 	else
 	{
 	  if ( (op == ITM_EQUAL) &&
 		   (CmpCommon::getDefault(COMP_BOOL_40) == DF_ON))
 	  {
 		// if COMP_BOOL_40 == ON; for equality expressions involving
 		// more than one columns, we pick the column with Max UEC

 		ItemExpr *rightChild = pred->child(1);
 		ItemExpr *leftChild = pred->child(0);

 		rightChild->findAll(ITM_VEG_REFERENCE, rightLeafValues, TRUE, TRUE);

 		leftChild->findAll(ITM_VEG_REFERENCE, leftLeafValues, TRUE, TRUE);

 		// get leftColIndex for column with Max UEC. In case of predicates
 		// like a + 4 = 10, it will be the index for col 'a'

 		if ( ((rightLeafValues.entries() + leftLeafValues.entries()) > 1) &&
 		   (rightLeafValues != leftLeafValues))
 		{
 		  statsExist = getColStatDescIndexForColWithMaxUec(leftColIndex, leftLeafValues);

 		  // if there are more than one columns involved in the expression, consider
 		  // histogram with MaxUEC to compute selectivities. This would include cases
 		  // where leftChild has two columns while right child is a constant.
 		  // Example (col1 + col2) = 10; and col1 = col2 + col3; and Fn(col1) = Fn(col2).

 		  // For predicates like col1 = 10 or col1 + 4 = 10
 		  // use Max frequency as the selectivity

 		  if ( (statsExist) &&
 		       ( (exprOpCode != REL_SCAN) ||
 		         (CmpCommon::getDefault(COMP_BOOL_74) == DF_OFF) ) )
 			equiJoinWithExpr = TRUE;
 		} // equi-join
 		else
 		{
 		  // Is a local predicate with column only on the left child
 		  leftChild = leftChild->getLeafValueIfUseStats();
 		  leftLeafValues.clear();

 		  leftChild->findAll(ITM_BASECOLUMN, leftLeafValues, TRUE, TRUE);
 		  column = leftChild->getValueId();

 		  if (getColStatDescIndexForColumn(leftColIndex, column) )
 			  statsExist = TRUE;
 		}
 	  } // equal predicate and comp_bool_40 == DF_ON
 	  else
 	  {
 		// for all other predicates with arity > 0
 		if (pred->getArity() > 0)
 		{
 		  ItemExpr * leftChild = pred->child(0);

 		  leftChild = leftChild->getLeafValueIfUseStats();
 		  column = leftChild->getValueId();

 		  leftChild->findAll(ITM_BASECOLUMN, leftLeafValues, TRUE, TRUE);

 		  if (getColStatDescIndexForColumn(leftColIndex, column) )
 			  statsExist = TRUE;
 		} // end if predArity > 0
 	  } // if non-equality-predicates or COMP_BOOL_40 == OFF
 	} // !ITM_VEG_PREDICATE

 	if (statsExist)
 	{
 	  // Get the first column for which the histogram is found.
 	  // multi-column histograms no longer are running around the system, but
 	  // there are still predicates that reach here
 	  //
 	  //   e.g., predicates involving aggregates (count(*) = 10)
 	  //   e.g., predicates on host variables (?p = 10)
 	  //
 	  // ... probably many more

 	  // CASE 1a : VEG pred
 	  if ( op == ITM_VEG_PREDICATE ) // Transitive closure predicate
 	  {
 		// ----------------------------------------------------------
 		// This logic replicates a portion to the logic used in
 		// ColStatDescList::applyVEGPred to determine the parts of this
 		// VEGPred for which 'real' histogram manipulation was NOT done.
 		//
 		// Default selectivity should be applied *once* for *each* valid
 		// join that couldn't be executed because of its multi-column
 		// nature, but which would have otherwise been a valid join.
 		// (So long as an involved column hasn't already appeared in a
 		// default EQ-join.)
 		// ----------------------------------------------------------
 		// $$$ FCS_ONLY kludge -- this code should be reviewed and
 		// $$$ reconsidered post-FCS
 		//
 		// We reach this point when we're applying a predicate that looks
 		// like <col> = hostvar.
 		//
 		// We may reach this point in other instances, too.  For now, we're
 		// not going to worry about that.
 		//
 		// For this situation, we set the rowcount to be rc/uec, and set
 		// uec to be 1.  This reduction is exactly what you'd expect
 		// when applying an eqpred with a hostvar.
 		//
 		ColStatDescSharedPtr rootStatDesc = (*this)[leftColIndex];
 		ColStatsSharedPtr rootColStats = rootStatDesc->getColStatsToModify();

 		CostScalar oldUec  = rootColStats->getTotalUec();
 		CostScalar oldRows = rootColStats->getRowcount();

 		CostScalar newRows = oldRows / oldUec;

                 // compute maxSelectivity before histograms get modified
                 if (maxSelectivity)
                   {
                     VEG* veg = ((VEGPredicate*)pred)->getVEG();
                     const ValueIdSet & values = veg->getAllValues();
                     // we must distinguish between "X=?" and "X=Y".
                     ItemExpr *cExpr = NULL;
                     if (!values.referencesAConstExpr(&cExpr))
                       { // veg is an "X=Y" predicate
                         // maxSelectivity("X=Y") == 1.0
                       }
                     else // veg is an "X=?" predicate
                       {
                         // maxFreq = maxFrequency(X) for VEGPred "X=?"
                         // NB: "maxFreq = histograms.getMaxFreq(v);" may look
                         // attractive here. But, beware: it causes many maxCard
                         // tests to fail in compGeneral/test015 -- they get 0.
                         CostScalar maxFreq = csMinusOne;
                         for (ValueId v = values.init();
                              values.next(v);
                              values.advance(v))
                           {
                             if (v.getItemExpr()->getOperatorType() ==
                                 ITM_BASECOLUMN)
                               {
                                 maxFreq = getMaxFreq(v);
                                 if (maxFreq > csMinusOne)
                                   break;
                               }
                           }
                         // maxSelectivity("X=?") == max frequency(X) / total rows
                         if (maxFreq > csMinusOne &&
                             rootColStats->getRowcount() > csZero)
                           *maxSelectivity =
                             MINOF(maxFreq / rootColStats->getRowcount(),
                                   *maxSelectivity);
                       } // veg is an "X=?" predicate
                   } // maxSelectivity != NULL

 		rootStatDesc->synchronizeStats(
 		  oldRows,
 		  newRows,
 		  ColStatDesc::SET_UEC_TO_ONE
 		  );
 		alreadyApplied = TRUE;
 	  } // 1a: veg pred

 	  // CASE 1b : equal, not equal
 	  else if ( ( op == ITM_EQUAL ) || ( op == ITM_NOT_EQUAL ) )
 	  {
 		ColStatDescSharedPtr leftStatDesc = (*this)[leftColIndex];

 		if (equiJoinWithExpr)
 		{
 		  // Control comes here for equality predicates with more
 		  // than one column involved. The columns could be from
 		  // both children or from only left child

 		  globalPredicate = FALSE; // not a global predicate
 		  if (NOT(leftStatDesc->isPredicateApplied(origPredValueId) ) )
 		  {
 			CostScalar fudgeFactorForAggFn ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_6));

 			// rightLeafValues > 0 and leaftLeafValues > 0 or <col1> = <col2>
 			// col1 = Fn(col2, col3) OR Fn(col1, col2) = col3
 			// selectivity is equal to 1/MAXUEC of the columns
 			// participating in the query
 			// get histograms from right child. In case there are
 			// more than one columns in the right child, get the histogram
 			// with max UEC

 			ColStatsSharedPtr rightColStat;
 			CollIndex rightColIndex;

 			ColStatsSharedPtr leftColStat = leftStatDesc->getColStats();
 			NABoolean leftColStatReal = !leftColStat->isOrigFakeHist();
 			NABoolean rightColStatReal = FALSE;

 			// UEC of the children child and maxUEC
 			CostScalar rightUec = csMinusOne;
 			CostScalar leftUec = leftColStat->getTotalUec();;

 			// If we reached here, it is guaranteed that we have histograms for the
 			// left child. But, we cannot say anything for sure for the right child.
 			// Hence check if there exists any column in the right side of the equality
 			// predicates.

 			// if rightside has any column, get the histograms for the right child
 			if ((rightLeafValues.entries() > 0) &&
 			    (getColStatDescIndexForColWithMaxUec(rightColIndex, rightLeafValues)) )
 			{
 			  // out of all these columns, pick the one with Max UEC. While doing
 			  // that it also makes sure that it is comparing the default UEC to
 			  // default UEC and actual UEC to actual one of both children
 			  rightColStat = (*this)[rightColIndex]->getColStats();
 			  rightUec = rightColStat->getTotalUec();
 			  rightColStatReal = !rightColStat->isOrigFakeHist();
 			} // if getColIndex for rightLeafValues

 			// check for aggregate function for both children
 			// and adjust UECs accordingly
 			// We would have ideally like to compute selectivity for
 			// aggregate functions by considering UEC for only those
 			// columns which are a children of Aggregate functions.
 			// Because of time constraint and not being able to find
 			// an inexpensive way to handle that, we are now taking the
 			// cardinality of the column with MAX UEC from the child
 			// and in case of an aggregate, multiply with a fudge factor
 			// Hence there could be cases where for predicates like
 			// "a + max(b) = c", we might pickup col 'a' as that has the
 			// highest UEC and multiple cardinality of that by the fudge
 			// factor. If columns 'a' and 'b' belog to the same table,
 			// or have already been joined, it would not matter which
 			// column we pickup, but in some cases it might poor estimates

 			if (pred->child(0)->containsAnAggregate())
 			{
 			  leftUec = (leftUec * fudgeFactorForAggFn).minCsOne();
 			}

 			if (rightColStat && pred->child(1)->containsAnAggregate())
 			{
 			  rightUec = (rightUec * fudgeFactorForAggFn).minCsOne();
 			}

 			// To get the maxUec for selectivity, we don't want to compare
 			// real UEC with the fake one. Hence, also check for fakeness
 			// before comparing

 			if (leftColStatReal && !rightColStatReal)
 			  defaultSel = csOne/leftUec;
 			else
 			  if (rightColStatReal && !leftColStatReal)
 				defaultSel = csOne/rightUec;
 			  else
 				defaultSel = csOne/MAXOF(leftUec, rightUec);

 			if( (pred->child(0)->getOperatorType() == ITM_DAYOFWEEK) ||
 			    (pred->child(1)->getOperatorType() == ITM_DAYOFWEEK) )
 			    defaultSel = csOne/7;

 			// Since the histograms have not been merged, we don't know which
 			// one will be finally picked up later for parent node, based on
 			// the characteristics output. Hence set the aggregate information
 			// of all correctly. Rowcount and UEC for all will be done automatically
 			// during synchronizeStats. appliedPreds is the only that needs to be
 			// correctly set

 			ValueIdSet columnWithPreds(leftLeafValues);
 			columnWithPreds.insert(rightLeafValues);

 			addToAppliedPredsOfAllCSDs( columnWithPreds, origPredValueId );

 			// This will modify the rowcount, which should be done for only
 			// one histogram. Remaining histograms will be synchronized later
 			leftStatDesc->applySel( defaultSel );
 		  } // NOT(isPerdicateApplied)
 		  else
 		  {
 			// predicate has already been applied
 			alreadyApplied = TRUE;
 		  }
 		} // equiJoinWithExpr
 		else
 		{
 		  // go the usual way. Control comes here for equality predicates, if
 		  // COMP_BOOL_40 is OFF or for cases where we have a local predicate
 		  // wih one column in the left child.

 		  ////////////////////////////////////////////////////////////////////
 		  // do not need to worry about selectivity constant parameters here
 		  // because this branch is for predicates with ORs: a=? OR b=?. Each
 		  // predicate factor such as a=? is processed once by this function.
 		  // Since OR predicate is cacheable but not parameterizable, it is
 		  // impossible to have a selectivity constant parameter situation in
 		  // the item expression tree. That is, there is no need to search
 		  // for a selectivity constant parameter and to use its selectivity
 		  // value instead.
 		  ////////////////////////////////////////////////////////////////////
 		  if (leftLeafValues.entries() == 1)
 		  {
 			// First Question: is the predicate of the form
 			//   "<col> <op> <expression>"  or  "<expression> <op> <col>"?
 			//
 			// Earlier logic places stand-alone columns on the left, so look
 			// for a histogram associated with the left-hand valueId.
 			//
 			// Of course, the trick here is that the following works even if
 			// the left-hand ValueId isn't for a column.

 			globalPredicate = FALSE; // not a global predicate

 			if ( NOT ( leftStatDesc->isPredicateApplied( origPredValueId ) ) )
 			{ // first time for this histogram

 			  rightLeafValues.clear();
 			  ItemExpr * rhs = pred->child(1);

 			  rhs = rhs->getLeafValueIfUseStats();
 			  rhs->findAll( ITM_BASECOLUMN, rightLeafValues, TRUE, TRUE );

 			  // There is only one column in the predicate or there is one
 			  // column on the right hand side of the predicate and
 			  // this column is same as the left side column, use Uec
 			  // of the column instead of using default statistics

 			  if  (( rightLeafValues.isEmpty() )  ||
 			   ( (rightLeafValues.entries() == 1) && (leftLeafValues == rightLeafValues ) ) )
 			  {
 				// <col> <op> <constant> or <col> <op> <col>,
 				// where the left and the right col are same
 				// If this is a 'fake' histogram, the selectivity
 				// assumed is the default selectivity associated
 				// with the current predicate.  (But, don't apply
 				// the same type of predicate multiple times.)
 				if ( leftStatDesc->getColStats()->isFakeHistogram() )
 				{
 				  defaultSel =
 					(leftStatDesc->isSimilarPredicateApplied( op ) ?
 					csOne : pred->defaultSel() );
 				}
 				else
 				{ // not a 'fake histogram'
 				  // Determine the rowcount of the non-NULL value
 				  // with the greatest/least rowcount.
 				  ColStatsSharedPtr colStatsP = leftStatDesc->getColStats();
 				  HistogramSharedPtr histP    = colStatsP->getHistogram();
 				  const CostScalar & rowRedF = colStatsP->getRedFactor();

 				  const CostScalar & rowCountBeforePred = (colStatsP->getRowcount()).minCsOne();

 				  CostScalar maxRowCount = csOne;
 				  CostScalar minRowCount = rowCountBeforePred;
 				  CostScalar tmpRowCount;
 				  CostScalar uec;

 				  Interval iter = histP->getFirstInterval();

 				  while ( iter.isValid() && !iter.isNull() )
 				  {
 					// uec must be at least 1 for these
 					// calculations since we don't want to get
 					// a huge blowup in rowcount
 					if ( iter.getUec().isZero() )
 					{
 					  iter.next();
 					  continue; // avoid divide-by-zero!
 					}

 					CostScalar iRows = rowRedF * iter.getRowcount();

 					uec = (MINOF(iRows, iter.getUec())).minCsOne();

 					tmpRowCount =
 					  iRows / uec;

 					if ( tmpRowCount > maxRowCount )
 					  maxRowCount = tmpRowCount;

 					if ( tmpRowCount < minRowCount )
 					  minRowCount = tmpRowCount;

                                         iter.next();

 				  } // end while iter() is valid

 				  if ( op == ITM_EQUAL )
 				  {
 					defaultSel = maxRowCount / rowCountBeforePred;
                                         if (maxSelectivity)
                                           {
                                             // maxSelectivity(x=?) == max frequency / total rows
                                             *maxSelectivity =
                                               MINOF(defaultSel,
                                                     *maxSelectivity);
                                           } // maxSelectivity != NULL
 				  }
 				  else
 				  {
 					// With ITM_NOT_EQUAL, and no Histogram, avoid
 					// setting defaultSel to zero:
 					CostScalar numer;
 					CostScalar denom;

 					if ( minRowCount == rowCountBeforePred )
 					{
 					  numer = MAXOF( rowCountBeforePred, csTwo ) - csOne;
 					  denom = MAXOF( rowCountBeforePred, csTwo );

 					  defaultSel = numer / denom;
 					}
 					else
 					{
 					  numer = minRowCount;
 					  denom = rowCountBeforePred;

 					  defaultSel = csOne - ( numer / denom );

                                           // maxSelectivity(x<>?) == 1.0
                                           // which means do nothing here because 1.0 has
                                           // already been set as the default maxSelectivity
                                           // just before the estimateCardinality() call.
 					}
 				  } // op == ITM_NOT_EQUAL
 				}  // not a 'fake histogram'
 			  } // The Operand is a constant expression.
 			  else // i.e., NOT leafValues.isEmpty()
 			  {
 				// <col1> <op> <col2>, or <col1 + col2> <op> <col3>
 				// or <col1> <op> <col2 + col3>
 				// The operand involves more than one column, which
 				// makes this an equality join that we are not now
 				// able to evaluate.
 				// Note that the current predicate-based defaultSel
 				// routine is not used in this case.....
 				if ( op == ITM_EQUAL )
 				{
 				  if (leftStatDesc->isSimilarPredicateApplied( op ) )
 					defaultSel = csOne;
 				  else
 				  {
 					if ( (rightLeafValues.entries() == 1) &&
 					     (exprOpCode != REL_SCAN) )
 					{
 					  // <col1> = <col2>
 					  // we already know that left side has one column. This is the case
 					  // col1 Join col2

 					  CollIndex rightColIndex;

 					  if (getColStatDescIndexForColumn(rightColIndex, rhs->getValueId()) )
 					  {
 						ColStatDescSharedPtr rightStatDesc = (*this)[rightColIndex];

 						ColStatsSharedPtr leftColStats = leftStatDesc->getColStats();
 						ColStatsSharedPtr rightColStats = rightStatDesc->getColStats();

 						CostScalar maxUec = (MAXOF(leftColStats->getTotalUec(),
 												  rightColStats->getTotalUec())).minCsOne();

 						defaultSel = csOne/maxUec;
 					  } // colStat for column found
 					  else
 					  {
 						// histogram does not exist use default selectivity for Join equal
 						defaultSel = CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL );
 					  }
 					} // rightLeafValueEntries = 1
 					else
 					{
 					  // right side has more than one columns, or it is a scan.
 					  // Use default join equal selectivity for <col1> = <col2, col3>
 					  // and hist_no_stats_uec for scan
 					  if (exprOpCode == REL_SCAN)
 					    defaultSel = (1.0/CURRSTMT_OPTDEFAULTS->defNoStatsUec());
 					  else
 					    defaultSel = CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL );
 					}
 				  }
 				}
 				else
 				{ // op == ITM_NOT_EQUAL. Apply default selectivity
 				  if (leftStatDesc->isSimilarPredicateApplied( op ) )
 				    defaultSel = csOne;
 				  else
 				  {
 				    if (exprOpCode == REL_SCAN)
 				      defaultSel = 1 - (1.0/CURRSTMT_OPTDEFAULTS->defNoStatsUec() );
 				    else
                                       defaultSel = (1 - CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL ) );
 				  }
 				}
 			  } // NOT leafValue.isEmpty()
 			  leftStatDesc->addToAppliedPreds( origPredValueId );
 			  leftStatDesc->applySel( defaultSel );
 			}
 			else
 			{ // predicate has already been applied
 			  alreadyApplied = TRUE;
 			}
 		  } // column is leading prefix of histogram
 		} // 1b: equal, not equal
 	  } // !equiJoinWithExpr

 	  // CASE 1c : less, less_eq, greater, greater_eq
 	  else if (	 op == ITM_LESS
 			OR op == ITM_LESS_EQ
 			OR op == ITM_GREATER
 			OR op == ITM_GREATER_EQ
 		  )
 	  {
 		ItemExpr * rhs = pred->child(1);
 		rhs = rhs->getLeafValueIfUseStats();

 		// First Question: is the predicate of the form
 		//   "<col> <op> <expression>"  or  "<expression> <op> <col>"?

 		globalPredicate = FALSE;   // not a 'global' predicate
 		ColStatDescSharedPtr statDesc = (*this)[leftColIndex];

 		if ( NOT ( statDesc->isPredicateApplied( origPredValueId ) ) )
 		{ // first time for this histogram
 		  defaultSel = statDesc->selForRelativeRange (op, column, rhs);

 		  // defaultSel is one for range predicates on char or varchar
 		  // column types.
 		  // if a similar predicate has already been applied to this
 		  // histogram, then we don't want to reduce the rowcount and
 		  // uec further. Therefore we return the selectivity equal to
 		  // one. Following two predicates are said to be similar:
 		  // 1. < and <=
 		  // 2. > and >=
 		  // 3. If the two range predicates are derived from LIKE predicate

 		  rightLeafValues.clear();

 		  rhs->findAll( ITM_BASECOLUMN, rightLeafValues, TRUE, TRUE );

 		  // There is only one column in the predicate or there is one
 		  // column on the right hand side of the predicate and
 		  // this column is same as the left side column, use Uec
 		  // of the column instead of using default statistics

 		  if  (( rightLeafValues.isEmpty() )  ||
 		   ( (rightLeafValues.entries() == 1) && (leftLeafValues == rightLeafValues ) ) )
 		  {
 			// pred <col> operatort <expression>
 			if ( defaultSel == csOne )
 			{
 			  if (statDesc->derivOfLikeAndSimilarPredApp(pred) ||
 				  statDesc->isSimilarPredicateApplied( op ) )
 				defaultSel = csOne;
 			  else
 			  {
 				BiRelat *br = (BiRelat *) pred;
 				if (br->derivativeOfLike())
 				  defaultSel = br->getLikeSelectivity();
 				else
 				  defaultSel = pred->defaultSel();
 			  } // else statDesc->similarPredApplied
 			} // if defaultSel == csOne
 		  } // if rightLeafValues.entries > 1 or leftLeafValues != rightLeafValues
 		  else
 		  {
 			// not a leaf value. Predicate is <col> operator <col>
 			defaultSel =
 			  ( statDesc->isSimilarPredicateApplied( op ) ?
 			  csOne : CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_RANGE ) );
 		  } // end of join range

 		  statDesc->addToAppliedPreds( origPredValueId );
 		  statDesc->applySel( defaultSel );
 		}
 		else
 		{
 		  alreadyApplied = TRUE;
 		}
 	  } // 1c: less, less_eq, greater, greater_eq

 	  // CASE 1d : null, not null, unknown, not unknown
 	  else if (	 op == ITM_IS_NULL
 			OR op == ITM_IS_NOT_NULL
 			OR op == ITM_IS_UNKNOWN
 			OR op == ITM_IS_NOT_UNKNOWN
 		  )
 	  {
 		globalPredicate = FALSE;   // not a 'global' predicate
 		ColStatDescSharedPtr statDesc = (*this)[leftColIndex];

 		if ( NOT ( statDesc->isPredicateApplied( origPredValueId ) ) )
 		{ // first time for this histogram
 		  defaultSel = ( statDesc->isSimilarPredicateApplied( op ) ?
 				 csOne : pred->defaultSel() );

 		  statDesc->addToAppliedPreds( origPredValueId );
 		  statDesc->applySel( defaultSel );
 		}
 		else
 		{
 		  alreadyApplied = TRUE;
 		}
 	  } // 1d: null, not null, unknown, not unknown

 	  // CASE 1e : or, and
 	  else if ( ( op == ITM_OR) || ( op == ITM_AND ) )
 	  {
 		// Don't do anything with this predicate right here, right now.
 		alreadyApplied = TRUE;
 	  }  // op is AND, or OR

 	  // CASE 1f : should never occur!
 	  else
 	  {
 		DCMPASSERT( FALSE ); // unexpected condition!
 		alreadyApplied = TRUE;
 	  }
 	} // if !alreadyApplied
   } //  if statsExist
   //
   // CASE 2 : predicate does not support synthesis
   //
   else
   {
     if ( pred->getArity() > 0 )
     {
 	  NABoolean isOpTypeNot = FALSE;
 	  ItemExpr * tempPred;
 	  OperatorTypeEnum tempOp;

 	  ValueIdSet leftLeafValues;

 	  ItemExpr * leftChild = pred->child(0);

 	  if(pred->getOperatorType() == ITM_NOT)
 	  {
 	    isOpTypeNot = TRUE;
 	    tempOp = leftChild->getOperatorType();
 	    tempPred = leftChild;
 	    leftChild = leftChild->child(0);
 	  }
 	  else
 	  {
 	    tempOp = op;
 	    tempPred = pred;
 	  }

 	  leftChild = leftChild->getLeafValueIfUseStats();

 	  leftChild->findAll(ITM_BASECOLUMN, leftLeafValues, TRUE, TRUE);

 	  if (!leftLeafValues.isEmpty())
 	  {
 		// First Question: is the predicate of the form
 		//   "<col> <op> <expression>"  or  "<expression> <op> <col>"?
 		// Earlier logic places stand-alone columns on the left, so look
 		// for a histogram associated with the left-hand valueId.
 		//

 		CollIndex leftColIndex;

 		if (getColStatDescIndexForColumn(leftColIndex, leftChild->getValueId()) )
 		{
 		  globalPredicate = FALSE;   // not a 'global' predicate
 		  ColStatDescSharedPtr statDesc = (*this)[leftColIndex];

 		  if ( NOT ( statDesc->isPredicateApplied( origPredValueId ) ) )
 		  {
 			// first time for this histogram
 			// if the predicate is a LIKE predicate with no wild cards
 			// in the pattern. And for some reason could not be transformed
 			// into an equality predicate, then set its selectivity equal to
 			// 1/UEC, else go the usual way

 			if ( (tempOp == ITM_LIKE) &&
 			   ((Like *)tempPred)->isPatternAStringLiteral())
 			{
 			  ColStatsSharedPtr colStat = statDesc->getColStats();

 			  if (colStat->isFakeHistogram())
 				defaultSel = 1.0/(CURRSTMT_OPTDEFAULTS->defNoStatsUec()) ;
 			  else
 			  {
 				CostScalar tempUec = colStat->getTotalUec();

 				//To guard against div-by-zero assertion
 				if(tempUec == csZero)
 				  tempUec = 1;

 				defaultSel = 1.0/tempUec.value();
 				defaultSel.maxCsOne();
 			  }


 			  if(isOpTypeNot)
 			    defaultSel = 1 - defaultSel.value();
 			}
 			else
 			  defaultSel = pred->defaultSel();

 			statDesc->addToAppliedPreds( origPredValueId );
 			statDesc->applySel( defaultSel );
 		  } // NOT isPredicateApplied
 		  else
 		  {
 			alreadyApplied = TRUE;
 		  }
 		} // column is leading prefix of histogram
 	  }
     } // if ( pred->getArity() > 0 )
     else
     {
       defaultSel = pred->defaultSel();
     }
   }
   }

   // maxSelectivity computation is done
   if (maxSelectivity) return;

   //
   // we do the following whether the predicate supports synthesis or not
   //
   if ( globalPredicate )
   {
     if(pred->isSelectivitySetUsingHint())
     {
       defaultSel = pred->getSelectivityFactor();
       (*this)[0]->getColStats()->setSelectivitySetUsingHint();
     }
     else
       // not a local predicate, yet still a default predicate
       // ==> should mean: No statistics.
       defaultSel = pred->defaultSel();

     // *******************************************************************
     // $$$ this code should go away sooner or later (when the normalizer /
     //     constant-folding do everything they should ...)
     //
     // until then, handle a few simple braindead cases here that should
     // already have been taken care of
     if ( pred->getArity() == 2 )
     {
       NABoolean negate = FALSE;
       ConstValue * lhs = pred->child(0)->castToConstValue( negate );
       ConstValue * rhs = pred->child(1)->castToConstValue( negate );
       if ( lhs != NULL && rhs != NULL )
       {
 	EncodedValue left ( lhs, negate );
 	EncodedValue right( rhs, negate );

 	if ( left == right )
 	{
 	  OperatorTypeEnum op = pred->getOperatorType();
 	  switch ( op )
 	  {
 	  case ITM_EQUAL:         // 1 == 1
 	  case ITM_LESS_EQ:       // 1 <= 1
 	  case ITM_GREATER_EQ:    // 1 >= 1
 	    return; // pred has no effect on selectivity!

 	  case ITM_NOT_EQUAL:     // 1 <> 1
 	  case ITM_GREATER:       // 1 >  1
 	  case ITM_LESS:          // 1 <  1
 	    defaultSel = csZero ; // pred removes all rows!
 	    break;

 	  default:
 	    break;
 	  }
 	}
       }
     } // $$$ end of stuff that should someday be removed ...
     // *******************************************************************

   } // globalPredicate

   // $$$ WE NO LONGER WANT TO NORMALIZE HISTOGRAMS AFTER EVERY
   // $$$ PREDICATE!
   //
   // Instead, we will normalize all of the histograms' rowcounts after
   // we have applied all of the predicates.
   //
   // However, in the case of a global predicate (read: one that does not
   // apply to any particular histogram or histograms), we apply its
   // selectivity to all ColStatDesc's.

   if ( NOT alreadyApplied && globalPredicate )  // don't redo what's already been done.
   {
     if(isAFalsePred)
       defaultSel = csZero;

     // Don't apply the selectivity here!  Instead, we return it to the
     // calling function (via the out-parameter globalPredicateReduction) ;
     // later on, we will go through the histograms and apply it to all
     // of them, at the same time that we apply all of the predicate
     // selectivities to all of them (end of estimateCardinality).
     //
     // NOTE: if we apply this selectivity here, to all histograms, the
     // end result is that it appears that each histogram had a separate
     // _x_ reduction applied to it, so the total result of the predicate
     // will be _x_^n, where n is the number of histograms!
     globalPredicateReduction = defaultSel;
   }
   else
   {
     globalPredicateReduction = csOne;
   }
 } //ColStatDescList::applyDefaultPred

 // -----------------------------------------------------------------------
 // ColStatDescList::getMaxFreq
 // get maximum frequency for the given col
 //
 // -----------------------------------------------------------------------

 CostScalar
 ColStatDescList::getMaxFreq(ValueId col)
 {
   CollIndex index;

   NABoolean found = getColStatDescIndexForColumn( index, col );

   // histogram not found, return
   if ( found == FALSE )
 	return -1.0;

   ColStatsSharedPtr colStats = (*this)[index]->getColStats();

   CostScalar freq = colStats->getMaxFreq();
   if (freq <= csZero)
   {
 	ColStatsSharedPtr colStatsModifiable = (*this)[index]->getColStatsToModify();
 	colStatsModifiable->computeMaxFreqOfCol(TRUE);
 	freq = colStatsModifiable->getMaxFreq();
   }
   return freq;
 }

 CostScalar ColStatDescList::getUEC(ValueId col)
 {
   CollIndex index;
   NABoolean found = getColStatDescIndexForColumn( index, col );

   // histogram not found, return 1
   if ( found == FALSE )
     return 1.0;

   ColStatsSharedPtr colStats = (*this)[index]->getColStats();
   CostScalar uec = colStats->getTotalUec();
   if (uec < 1.0)
     uec = 1.0;
   return uec;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getMaxOfMaxFreqOfCol
 // get maximum frequency for the given column set
 //
 // -----------------------------------------------------------------------

 CostScalar ColStatDescList::getMaxOfMaxFreqOfCol(const ValueIdSet & baseColSet)
 {
   CostScalar maxFreq = csZero;
   CostScalar freq = csZero;

   for (ValueId col = baseColSet.init();
 			baseColSet.next(col);
 			baseColSet.advance(col))
   {
     if (col == NULL_VALUE_ID)
       continue;

 	  ColStatsSharedPtr colStat = this->getColStatsPtrForColumn (col);
 	  ColAnalysis * colAnalysis = col.colAnalysis();

 	  // for higher way joins, if the base column does not appear as
 	  // characteristic output, its histogram will not be cached. In that
 	  // case we shall use the total frequency when that histogram last appeared
 	  // in the list

 	  if (colStat == NULL)
 	  {
 	    if (colAnalysis &&
 		   (colAnalysis->getMaxFreq() != csMinusOne) )
 	    {
 	      freq = colAnalysis->getMaxFreq();
 	      if (freq > maxFreq)
 		maxFreq = freq;
 	    }
 	    else
 	    {
 	      // if there is no way we can get the UEC for partitioning
 	      // column, set it equal to row count, and don't bother to
 	      // look at other columns
 	      maxFreq = csZero;
 	      break;
 	    }
 	  }
 	  else
 	  {
             if (colStat->isOrigFakeHist())
             {
               maxFreq = csZero;
               break;
             }
 	    freq = this->getMaxFreq(col);
 	    if (freq > maxFreq)
 		maxFreq = freq;
 	  }
   }
   return maxFreq;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getMinOfMaxFreqOfCol
 // get maximum frequency for the given column set
 //
 // -----------------------------------------------------------------------

 CostScalar ColStatDescList::getMinOfMaxFreqOfCol(const ValueIdSet & baseColSet)
 {
   CostScalar minFreq = COSTSCALAR_MAX;
   CostScalar freq = csOne;

   if (baseColSet.entries() == 0)
     return csZero;

   for (ValueId col = baseColSet.init();
 			baseColSet.next(col);
 			baseColSet.advance(col))
   {
     if (col == NULL_VALUE_ID)
       continue;

 	  ColStatsSharedPtr colStat = this->getColStatsPtrForColumn (col);
 	  ColAnalysis * colAnalysis = col.colAnalysis();

 	  // for higher way joins, if the base column does not appear as
 	  // characteristic output, its histogram will not be cached. In that
 	  // case we shall use the total frequency when that histogram last appeared
 	  // in the list

 	  if (colStat == NULL)
 	  {
 	    if (colAnalysis &&
 		   (colAnalysis->getMaxFreq() > csZero) )
 	    {
 		    freq = colAnalysis->getMaxFreq();
 		    if (freq < minFreq)
 		     minFreq = freq;
 	    }
 	    else
 	    {
 		    // if there is no way we can get the UEC for partitioning
 		    // column, set it equal to row count, and don't bother to
 		    // look at other columns
 		    minFreq = csZero;
 		    break;
 	    }
 	  }
 	  else
 	  {
             if (colStat->isOrigFakeHist())
             {
               minFreq = csZero;
               break;
             }

         freq = this->getMaxFreq(col);
         if (freq < minFreq)
 		  minFreq = freq;
 	  }
   }

   return minFreq;
 }

 // Max frequencies are computed slightly differently for Case expressions
 // here if one of the leaves happens to be a constant, then we shall
 // assume the frequency to be one. A leaf value is the result of if-then-else
 // Example: if <condition> then <leaf1> else <leaf 2>

 CostScalar
 ColStatDescList::getMaxFreqForCaseExpr(const ValueIdSet & leafValues)
 {
   if(leafValues.entries() == 0)
   {
     CCMPASSERT ( leafValues.entries() > 0 );

     // In absence of leaf values, it is not possible to calculate max freq.
     // Return zero to indicate uniform distribution.
     return csZero;
   }

   CostScalar maxFreq = csZero;
   CostScalar freq = csZero;

   for (ValueId id = leafValues.init(); leafValues.next(id); leafValues.advance(id))
   {
     if (id.getItemExpr()->doesExprEvaluateToConstant(FALSE))
     {
        maxFreq = csOne;
        break;
     }
     else
     {
        freq = getMaxOfMaxFreqOfCol(id);
        if (freq > maxFreq)
           maxFreq = freq;
     }
   }

   // Max out the number of leaves to 5, to avoid getting very low frequencies.
   CostScalar maxFreqOfCaseExpr = csOne;
   double numOfLeaves = (double)(MINOF(leafValues.entries(), 5));

   // To avoid div-by-zero exception
   numOfLeaves = MIN_ONE(numOfLeaves);

   maxFreqOfCaseExpr = maxFreq * (1/numOfLeaves);

   return maxFreqOfCaseExpr;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getMinUec
 //
 // Returns the minimum UEC from the given column set
 // -----------------------------------------------------------------------
 CostScalar ColStatDescList::getMinUec(const ValueIdSet & baseColSet) const
 {
   CostScalar uec = csMinusOne;
   CostScalar minUec = COSTSCALAR_MAX;

   // This is an error condition, which would later assume uniform distribution
   if (baseColSet.entries() == 0)
 	return csMinusOne;

   for (ValueId col = baseColSet.init();
 			baseColSet.next(col);
 			baseColSet.advance(col))
   {
 	ColStatsSharedPtr colStat = this->getColStatsPtrForColumn (col);

 	// for higher way joins, if the base column does not appear as
 	// characteristic output, its histogram will not be cached. In that
 	// case we shall use the total UEC when that histogram last appeared
 	// in the list

 	if (colStat == NULL)
 	{
 	  ColAnalysis * colAnalysis = col.colAnalysis();
 	  if (colAnalysis &&
 		 (colAnalysis->getFinalUec() != csZero) )
 	  {
 		uec = colAnalysis->getFinalUec();
 		if (uec < minUec)
 		   minUec = uec;
 	  }
 	  else
 	  {
 		// if there is no way we can get the UEC for partitioning
 		// column, set it equal to row count, and don't bother to
 		// look at other columns
 		minUec = csMinusOne;
 		break;
 	  }
 	}
 	else
 	{
 	  uec = colStat->getTotalUec();
 	  if (uec < minUec)
 	  {
 	    minUec = uec;
 	  }
 	}
   }

   return minUec;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getMaxUec
 //
 // Returns the maximum UEC from the given column set. If histogram does
 // not exist for any column, then return -1, as error condition.
 // -----------------------------------------------------------------------
 CostScalar ColStatDescList::getMaxUec(const ValueIdSet & baseColSet) const
 {
   CostScalar uec = csMinusOne;
   CostScalar maxUec = csMinusOne;

   // This is an error condition, which would later assume uniform distribution
   if (baseColSet.entries() == 0)
 	return csMinusOne;

   for (ValueId col = baseColSet.init();
 			baseColSet.next(col);
 			baseColSet.advance(col))
   {
 	ColStatsSharedPtr colStat = this->getColStatsPtrForColumn (col);

 	// for higher way joins, if the base column does not appear as
 	// characteristic output, its histogram will not be cached. In that
 	// case we shall use the total UEC when that histogram last appeared
 	// in the list

 	if (colStat == NULL)
 	{
 	  ColAnalysis * colAnalysis = col.colAnalysis();
 	  if (colAnalysis &&
 		 (colAnalysis->getFinalUec() > csZero) )
 	  {
 		uec = colAnalysis->getFinalUec();
 		if (uec > maxUec)
 		   maxUec = uec;
 	  }
 	  else
 	  {
 		// if there is no way we can get the UEC for partitioning
 		// column, set it equal to row count, and don't bother to
 		// look at other columns. This would mean that if the partition
 	    // function contains a constant, we will have maxUec = rowcount
 		maxUec = csMinusOne;
 		break;
 	  }
 	}
 	else
 	{
       if (colStat->isOrigFakeHist())
       {
         maxUec = csMinusOne;
         break;
       }

 	  uec = colStat->getTotalUec();
 	  if (uec > maxUec)
 	  {
 	    maxUec = uec;
 	  }
 	}
   }
   return maxUec;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getMaxUecForCaseExpr
 //
 // Compute Max UEC based on the number of leaf expressions. Max
 // UEC will be equal to max of ((number of leaf expressions)
 // and (the max UEC of any columns in the leaf expressions)).
 // -----------------------------------------------------------------------
 CostScalar ColStatDescList::getMaxUecForCaseExpr(const ValueIdSet & leafValueSet) const
 {
   CostScalar maxUec = getMaxUec(leafValueSet);

   // limit the maximum number of constants as a leaf in the expression to 5
   maxUec = MAXOF(maxUec.getValue(), leafValueSet.entries());
   return maxUec;
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::getAggregateUec
 //
 // Hopefully a useful method for various users of histograms, both
 // internal and external.
 // -----------------------------------------------------------------------

 CostScalar ColStatDescList::getAggregateUec (const ValueIdSet & columns) const
 {
   CostScalar retval = csMinusOne;

   // We first see if there's any multi-column information that exactly
   // matches the column-list in question.

   if ( getUecList() != NULL )
   {
     // The lookup method returns -1 if there is no multi-column uec
     // information for the ValueIdSet parameter; otherwise, it returns
     // those columns' multi-column uec.
     retval = getUecList()->lookup( columns );
     if ( retval.isGreaterThanZero() /*> csZero*/ )
     {
       // We know how many aggregate uec these columns had initially at
       // the scan nodes, before any predicates were applied.  Now we
       // need to take those predicates into account.
       CollIndex index;
       CostScalar origUec, currUec;

       for ( ValueId column = columns.init();
 	    columns.next( column );
 	    columns.advance( column ) )
       {
 	origUec = getUecList()->lookup( column ); // get original single-column uec
 	NABoolean found = getColStatDescIndexForColumn( index, column );

 	// either orig or current single-column uec not found
 	if ( found == FALSE || origUec.isLessThanZero() /* <= csZero*/ )
 	  continue; // we continue looping

 	currUec = (*this)[index]->getColStats()->getTotalUec();

 	if ( currUec > origUec ) // sanity check: should not increase the UEC!
 	  continue;

 	retval *= currUec / origUec; // apply the reduction
       }

       return retval;
     }
   }

   // If we reach this point in the code, then we weren't able to use any
   // "true" multi-column information to answer the question.  So we simply
   // multiply the single-columns together to get a fudged aggregate
   // multi-column uec number.

   retval = csOne;

   CollIndex index;
   for ( ValueId column = columns.init();
 	columns.next( column );
 	columns.advance( column ) )
   {
     NABoolean found = getColStatDescIndexForColumn( index, column );

     // if any of the columns can't be found, abort
     if ( found == FALSE ) return csMinusOne;

     // multiply the totaluec of the histogram that matches each column
     retval *= (*this)[index]->getColStats()->getTotalUec();
   }

   return retval;

 }

 // ----------------------------------------------------------------------
 // getColStatDescIndexForColWithMaxUec(leftColIndex, leftLeafValues)
 // From the given ValueIdSet, the method returns the index of the histogram
 // with max UEC
 // ----------------------------------------------------------------------

 NABoolean ColStatDescList::getColStatDescIndexForColWithMaxUec(
   CollIndex & indexWithMaxUec, /* out */
   const ValueIdSet & inputColumns /* in */
   ) const
 {
   if (inputColumns.entries() == 1)
   {
 	ValueId vid;
 	inputColumns.getFirst(vid);

     return getColStatDescIndexForColumn(indexWithMaxUec, vid);
   }

   // This flag is used to indicate that histogram exists for at least
   // one of the columns on which the predicate is being applied
   // It does not differentiate between the real and the default
   // histogram

   NABoolean statsExist = FALSE;

   // This flag is set to TRUE if even one of the histogram for the
   // column on which the predicate is being applied has real stats

   NABoolean atleastOneRealHist = FALSE;

   // This CostScalar will contain maximum UEC amongst all fake histograms
   // if atleastOneRealHist flag is FALSE, or amongst all real histograms
   // if the flag atleastOneRealHist flag is TRUE

   CostScalar maxUec = csOne;

   for (ValueId id = inputColumns.init();
   inputColumns.next(id);
   inputColumns.advance(id))
   {
     CollIndex index;
     if (!(getColStatDescIndexForColumn(index, id) ) )
       continue;

 	statsExist = TRUE;
 	ColStatsSharedPtr colStats = (*this)[index]->getColStats();

 	// UECs of fake histograms are not real,
 	// We have already found at least one histogram for which actual stats
 	// exist, hence skip the fake histograms for UEC comparison
 	if (colStats->isOrigFakeHist() && atleastOneRealHist)
 		continue;

 	// Does real stats exist for this histogram? If this is the first
 	// real histogram being encountered, then flush out whatever maxUec
 	// has been computed so far.
 	if (!atleastOneRealHist && !colStats->isOrigFakeHist())
 	{
 	  maxUec = csOne;
 	  atleastOneRealHist = TRUE;
 	}

 	// This point onwards, we are comparing
 	// either all default UECs or all real UECs
     CostScalar uec = colStats->getTotalUec();
     if (uec >= maxUec)
     {
       maxUec = uec;
       indexWithMaxUec = index;
     }
   }

   // if stats exist for even one column, return TRUE;
   return statsExist;
 }

 // ------------------------------------------------------------------
 // addToAppliedPredsOfAllCSDs
 // Update appliedPred attribute for all histograms whose column
 // information is passed in the ValueIdSet
 // -------------------------------------------------------------------
 void
 ColStatDescList::addToAppliedPredsOfAllCSDs(const ValueIdSet & inputColumns,
 	const ValueId & newPredicate)
 {
   for (ValueId id = inputColumns.init();
   inputColumns.next(id);
   inputColumns.advance(id))
   {
     CollIndex index;
     if (!(getColStatDescIndexForColumn(index, id) ) )
       continue;

     ColStatDescSharedPtr colStatDesc = (*this)[index];
     colStatDesc->addToAppliedPreds(newPredicate);
     // also set the shape changed flag to TRUE
     colStatDesc->getColStats()->setShapeChanged(TRUE);
   }
 }


 // -----------------------------------------------------------------------
 // ColStatDescList::useMultiUecIfCorrelatedPreds
 //
 // A CSDL::estimateCardinality() subroutine
 //
 // Use multi-column uec to find the resulting rowcount if we are
 // applying multiple predicates to the same table on columns which
 // are highly correlated (i.e., functional dependencies, ...).
 //
 // helper fn : ValueIdHashFn(), used to create an associated list
 // for <ValueId, CostScalar>, which proves very useful.
 // -----------------------------------------------------------------------

 void
 ColStatDescList::useMultiUecIfCorrelatedPreds (
      CostScalar & newRowcount,		  // in/out
      const CostScalar & oldRowcount,	  // in
      CollIndex predCount,		  // in : quick check : proceed if >=2
      const CollIndexList &joinHistograms, // in : histograms used in MC Join
      CollIndex startIndex,		  // in : 1st idx of CSDL to look at
      CollIndex stopIndex,		  // in : idx of CSDL+1 to look at
      NAHashDictionary<ValueId, CostScalar> & biLogicPredReductions)
 {
   if ( getUecList() == NULL )
     return ;

   if ( startIndex >= stopIndex ) // misuse of function, oh well
     return ;

   if ( predCount < 2 ) // fewer than 2 histograms changed, nothing to do
     return ;

   NABoolean largeTableNeedsStats = FALSE;
   CostScalar adjRCBeforePreds = floor(oldRowcount.getValue());
   CollIndexList predList(STMTHEAP); // the CSDL-indices of the predicate-applied histograms
   CollIndex i;
   for ( i = startIndex; i < stopIndex; i++ )
   {
     ColStatsSharedPtr thisColStats = (*this)[i]->getColStats();
     // use ceil for the row count after preds on single column histograms
     // and floor of the original rowcount
     // to take care of costscalar rounding issues before doing the comparison
     // to see if MCs should be used to uplift the cardinalities.
     // use MC adjustment only if the adjusted rowcount from histogram after applying
     // predicates is less than the adjusted row count before applying predicates
     CostScalar adjHistRC = ceil(thisColStats->getRowcount().getValue());
     if ( NOT joinHistograms.contains( i )
 	 AND (adjHistRC < adjRCBeforePreds))
     {
       // Skip any histograms created for constants.

       if ((NOT largeTableNeedsStats) AND
       (thisColStats->isUpStatsNeeded() ) AND
       !(thisColStats->isVirtualColForHist() ) )
       largeTableNeedsStats = TRUE;

       predList.insert( i ); // store the index, not the histogram
     }
   }

   // should have already checked for this, but just to be sure ...
   if ( (predList.entries() + biLogicPredReductions.entries()) < 2 )
     return ;

   NAHashDictionary<ValueId, CostScalar> predReductions // <ValueId, rowred> pairs
     (&(ValueIdHashFn),       11, TRUE, HISTHEAP) ;

   CostScalar highestReductionFromPreds = csOne;

   for ( i = 0; i < predList.entries(); i++ )
   {
     ColStatDescSharedPtr tmpCSD = (*this)[ predList[i] ] ;
     CostScalar scCard = tmpCSD->getColStats()->getRowcount();

     const ValueIdSet & preds = tmpCSD->getMergeState();

     // There could be cases when the null-instantiated column has participated
     // in the join. This could be a case of left joins or in Union. In those
     // cases, the mergeState is updated by the nulledExpression (don't know why?).
     // But this should not have an impact on adjusting cardinalities for Scans
     // So, in case of null_instantiated column, retrieve its child and use
     // that to compute multi-col uec adjustment. Sol: 10-060609-7077 and 10-060607-7010

     ValueIdSet baseColSet;
 	preds.findAllReferencedBaseCols(baseColSet);

     const CostScalar rowRed = scCard / oldRowcount;

     if(rowRed < highestReductionFromPreds)
       highestReductionFromPreds = rowRed;

     for ( ValueId id = baseColSet.init(); baseColSet.next( id ); baseColSet.advance( id ) )
     {
       ValueId    * key   = new (HISTHEAP) ValueId( id );
       CostScalar * value = new (HISTHEAP) CostScalar( rowRed );
       ValueId * result = predReductions.insert( key, value );
       // all inserts should be successful; should not have the same
       // ValueId in multiple histograms
       //
       // --> NOT TRUE!  Outer joins can produce CSDL's which have 2
       // histograms for a given column; however, in this case, it's
       // generally (always?) the case that the 2nd instance of a
       // histogram is the null-instantiated one, so we should be able
       // to ignore it without any bad effects.
       //
     }
   }

   CostScalar highestReductionFromEqPreds = highestReductionFromPreds;

   // Append the columns and the respective reductions from the bilogic
   // predicates to the overall list of columns and reductions.
   NAHashDictionaryIterator<ValueId, CostScalar> biLogicPredIter( biLogicPredReductions );
   ValueId    * biLogicPredColumn = NULL;
   CostScalar * reduction  = NULL;
   CostScalar reductionFromBiLogicPreds = csOne;

   for ( biLogicPredIter.getNext( biLogicPredColumn, reduction );
 	biLogicPredColumn != NULL && reduction != NULL;
 	biLogicPredIter.getNext( biLogicPredColumn, reduction ) )
   {
     reductionFromBiLogicPreds *= *(reduction);

     // If the column already exists, multiply the bilogic reduction
     // to the overall reduction of the column
     if(predReductions.contains(biLogicPredColumn))
     {
       *(reduction) *= *(predReductions.getFirstValue(biLogicPredColumn));
       predReductions.remove(biLogicPredColumn);
     }

     if(*(reduction) < highestReductionFromPreds)
       highestReductionFromPreds = *(reduction);

     ValueId    * key   = new (HISTHEAP) ValueId( *biLogicPredColumn );
     CostScalar * value = new (HISTHEAP) CostScalar( *reduction );
     ValueId * result = predReductions.insert( key, value );
   }

   // The following is the  row count of the histogram encapsulating the most
   // reducing predicate. This serves as the upper bound for the uplifted rowcount.
   CostScalar minSingleColPredRC = csOne;

   // updatedOldRowcount stores row count before any of the predicates
   // being considered for correlation were applied.
   CostScalar updatedOldRowcount = oldRowcount;
   if(reductionFromBiLogicPreds != csZero)
   {
     updatedOldRowcount /= reductionFromBiLogicPreds;
     minSingleColPredRC = highestReductionFromPreds * updatedOldRowcount;
   }
   else
     minSingleColPredRC = highestReductionFromEqPreds * oldRowcount;

   // Now we've got an association list of
   //   <columns we've applied predicates to, associated rowcount reduction>
   // pairs.
   //
   // The question now becomes : do we have sufficient multi-column uec
   // information to determine an adjustment to the rowcount (due to multiple
   // predicates on highly-correlated columns)?  If so, then the value
   // of 'reductionAdjustment' should be applied to newRowcount; otherwise,
   // we're done.

   // When the newRowcount is zero, we give up the promotion
   if(newRowcount.isExactlyZero())
     return;

   CostScalar reductionAdjustment;

   NABoolean sufficientInformation =
     uecList()->useMCUecForCorrPreds (predReductions,        /* in/mod  */
                                         predCount,             /* in : #param */
                                         updatedOldRowcount,           /* in */
                                         newRowcount,           /* in */
 					largeTableNeedsStats,
                                         *this,                /* in. Pass list of histograms to be used for displayMissingStatsWarnings */
                                         reductionAdjustment);  /* out */

   // if we don't have sufficient MC-info, or there aren't any
   // "highly-correlated" columns, then never mind

   if(NOT sufficientInformation)
     return;

   CostScalar result = newRowcount * reductionAdjustment;

   // sanity check : if our calculated result actually *reduced* the
   // resulting rowcount (we are trying to increase it!), or if our
   // calculated result is more than the rowcount before we applied these
   // predicates (applying predicates should not increase rowcount!), then
   // we screwed up somewhere : ergo, ignore these results and return.
   if ( result < newRowcount)
     return;

   // High bound sanity check. The cardinality cannot be higher than, minimum
   // single column predicate.
   result = MINOF(result, minSingleColPredRC);
   newRowcount = result; // #retval
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::addRecentlyJoinedCols
 // Traverse the histogram list and collect the recently joined columns
 // -----------------------------------------------------------------------
 void
 ColStatDescList::addRecentlyJoinedCols(CollIndex startIndex,
                                        CollIndex stopIndex)
 {
   for (CollIndex i = startIndex ; i < stopIndex ; i++ )
   {
     ColStatsSharedPtr thisColStats = (*this)[i]->getColStats();

     // skip any histograms created for constants
     if ( !(thisColStats->isVirtualColForHist() ) &&
           thisColStats->isRecentJoin() )
     {
       // first Join set the joined cols in joinStatDescList, which will be
       // used later to set the min cardinality for join
       // for left joins and Unions, the columns in the merged state could be
       // hidden by another expression. Hence extract the base column from it
       // Statistics is not affected by the extra expression.

       ValueIdSet mergedState = (*this)[i]->getMergeState();
       ValueIdSet baseColSet;
       mergedState.findAllReferencedBaseCols(baseColSet);

       this->addToJoinedCols(baseColSet);
     }
   }
 }

 // -----------------------------------------------------------------------
 // ColStatDescList::useMultiUecIfMultipleJoins
 //
 // A CSDL::estimateCardinality() subroutine
 //
 // Use multi-column uec to find the resulting rowcount from a
 // multi-column join between two tables, if possible.
 //
 // We need the "oldRowcount" parameter in order to determine the
 // row reduction for each join Histogram.  The rowRedFactor_ data
 // member is not set until we do synchronizeStats() later on.
 //
 // E.g., for "join T1 and T2 on T1.a=T2.b and T1.c=T2.d", if we have
 // multi-column uec information on (T1.a,T1.c) and on (T2.b,T2.d), then we
 // can improve our rowcount estimate for this join.
 // -----------------------------------------------------------------------

 void
 ColStatDescList::useMultiUecIfMultipleJoins (
      CostScalar & newRowcount,       /* in/out */
      const CostScalar & oldRowcount, /* in */
      CollIndex startIndex,           /* in : first index of CSDL */
      CollIndex stopIndex,            /* in : last index of CSDL+1 */
      CollIndexList & joinHistograms, /* out */
      const Join * expr,
      MergeType mergeMethod)
 {
   // compute reduction from single column histograms uptill this point
   CostScalar redFromSC = newRowcount / oldRowcount;
   // We start by saying that are no joins represented by this ColStatDescList
   // a very typical case of cross product. As we see any joining column, we
   // will set this to TRUE. If we see more than joining column, we will
   // set it back to FALSE

   joinOnSingleCol_ = FALSE;

   // this should probably never happen ... but if it does: when we don't
   // have any multi-column information whatsoever,
   // don't cause a memory exception!
   if ( getUecList() == NULL )
     return;

   if ( startIndex >= stopIndex ) // misuse of function, oh well
     return;

   // first, we need to determine if more than one join was performed -- and
   // reset all of the "isRecentJoin" flags

   CollIndex i, j;

   for ( i = startIndex ; i < stopIndex ; i++ )
   {
     ColStatsSharedPtr thisColStats = (*this)[i]->getColStats();

     // skip any histograms created for constants
      if ( !(thisColStats->isVirtualColForHist() ) &&
            thisColStats->isRecentJoin() )
     {
       joinOnSingleCol_ = TRUE;

       (*this)[i]->getColStatsToModify()->setRecentJoin( FALSE ); // unset flag
       joinHistograms.insert( i ); // store the index, not the histogram
       // first Join set the joined cols in joinStatDescList, which will be
       // used later to set the min cardinality for join
       // for left joins and Unions, the columns in the merged state could be
       // hidden by another expression. Hence extract the base column from it
       // Statistics is not affected by the extra expression.

       ValueIdSet mergedState = (*this)[i]->getMergeState();
       ValueIdSet baseColSet;
       mergedState.findAllReferencedBaseCols(baseColSet);

       this->addToJoinedCols(baseColSet);
     }
   }

   // if fewer than 2 joins were performed, never mind
   if ( joinHistograms.entries() < 2 )
     return;

   // Join is on more than one column, so set the flag to FALSE

   joinOnSingleCol_ = FALSE;

   LIST(ValueIdList) joinValueIdPairs(STMTHEAP); // the ValueId's pairwise (per join)

   // look inside the mergeStates of each join histogram, and grab the
   // ValueIds associated with each join
   for ( i = 0 ; i < joinHistograms.entries() ; i++ )
   {
     ValueIdList tmp =
       (*this)[ joinHistograms[i] ]->getMergeState(); // set->list

     ValueIdList * tmp2 = new STMTHEAP ValueIdList();

     CollIndex indx = 0;

     // There could be cases when the null-instantiated column has participated
     // in the join. This could be a case of left joins or in Union. In those
     // cases, the mergeState is updated by the nulledExpression (don't know why?).
     // But this should not have an impact on adjusting cardinalities for join
     // So, in case of null_instantiated column, retrieve its child and use
     // that to compute multi-col uec adjustment. Sol: 10-060609-7077 and 10-060607-7010

     for (CollIndex k = 0; k < tmp.entries(); k++)
     {
       if (tmp[k].getItemExpr()->getOperatorType() != ITM_BASECOLUMN)
       {
         ValueIdSet result;
         tmp[k].getItemExpr()->findAll(ITM_BASECOLUMN, result, TRUE, TRUE);
         tmp2->insertSet(result);
       }
       else
         tmp2->insert(tmp[k]);
     }

     // although we have not considered the MC-UEC for the columns which have been reduced
     // by non-equality predicates, we still need to take into their selectivity from
     // join on single columns but by assuming that these are not correlated to other
     // columns
     joinValueIdPairs.insert( *tmp2 );
   }

   // now see how many of these match our multicolumn-uec information

   // the ValueId's for the columns of the two tables involved in the
   // multi-column join
   CostScalar prodInitUec = csMinusOne, // #docvars
 	     multiColUec = csMinusOne, // #retvals
              leftMCUec   = csOne;
   LIST(ValueIdList) joinValueIdPairsRemaining = joinValueIdPairs; // #retval
   const NABoolean largeTableNeedsStats = // should stats exist for this table?
     (*this)[0]->getColStats()->isUpStatsNeeded();

   NABoolean checkForLowBound = FALSE;

   // baseRCForMaxMCUEC contains the baseRowCount of the table which
   // has maximum multi-column UEC for the joining columns
   CostScalar baseRCForMCUEC = csOne;
   NABoolean joinOnUnique = FALSE;

   // If the join is between unique/non-unique sides, then the output parameter
   // 'multiColUec' will return row count of non-unique side.
   NABoolean sufficientInformation =
     uecList()->getUecForMCJoin (joinValueIdPairsRemaining, /* in/out */
                                 largeTableNeedsStats,      /* in     */
                                 expr,
                                 prodInitUec,            /* out    */
                                 multiColUec,            /* out */
                                 baseRCForMCUEC,
                                 leftMCUec,
                                 checkForLowBound,          /* out */
                                 joinOnUnique,
                                 *this,
                                 redFromSC);

   // minimum cardinality from join should be the minimum cardinality of group
   // for empty input logical properties.
   if (checkForLowBound)
     setCapForLowBound();

   if ( NOT sufficientInformation )
       return;

   // error! value not set, or simply avoiding div-by-zero! ignore MC-info in this case
   if ( multiColUec.isLessThanZero() )
     return;

   // Since we've reached this point, we know that the required
   // multi-column uec information exists to improve on the single-column
   // selectivity rowcount estimate!

   // -------------------------------------------------------------------------
   // What to do if joinValueIdPairsRemaining still has entries in it?
   //
   // If there are any joins that haven't been accounted for, that should
   // be alright (I think?), since their reductions have already been
   // applied to produce the single-column selectivity rowcount estimate.
   //
   // However, we don't want those entries in joinValueIdPairsRemaining to
   // go through the loop below.  So, remove these joins from
   // joinHistograms.
   // -------------------------------------------------------------------------
   for ( i = 0; i < joinValueIdPairsRemaining.entries(); i++ )
   {
     const ValueIdSet ithPair =
       joinValueIdPairsRemaining[i]; // convenience: list->set

     for ( j = 0; j < joinHistograms.entries(); j++ )
     {
       const ValueIdSet & mergeStateSet =
 	(*this)[ joinHistograms[j] ]->getMergeState();

       // for left joins and Unions, the columns in the merged state could be
       // hidden by another expression. Hence extract the base column from it
       // Statistics is not affected by the extra expression.

       ValueIdSet mergedState = (*this)[i]->getMergeState();
       ValueIdSet baseColSet;
       mergedState.findAllReferencedBaseCols(baseColSet);

       // the code in getUecForMCJoin sometimes removes a table
       // reference from an entry in joinValueIdPairsRemaining --> so
       // we can't use simple equality for the comparison below
       if ( baseColSet.contains( ithPair ) )
       {
 	joinHistograms.removeAt( j );
 	break ;
       }
     }
   }

   // the following lines contain variables defined/used in the MC-document
   const CostScalar SC_cardinality = newRowcount;    // #docvar ("single-col value")
   CostScalar       MC_cardinality = csOne;	      // #docvar ("multi-col value")

     CostScalar       minSingleColJoinRC = COSTSCALAR_MAX;

     // first, apply the sumOfMaxUec values
    for ( i = 0; i < joinHistograms.entries(); i++ )
     {
       CostScalar singleColJoinRC;
       singleColJoinRC = (*this)[ joinHistograms[i] ]->getColStats()->getRowcount();

       if (singleColJoinRC < minSingleColJoinRC)
         minSingleColJoinRC = singleColJoinRC;
     }

    NABoolean aSemiJoin = (expr && (expr->isSemiJoin() || expr->isAntiSemiJoin())) ? TRUE : FALSE;
    if (!expr && mergeMethod == SEMI_JOIN_MERGE)
      aSemiJoin = TRUE;

    if ( joinOnUnique && !aSemiJoin)
    {
      CostScalar leftRC = ((Join *)expr)->child(0).outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->getResultCardinality();
      CostScalar rightRC = ((Join *)expr)->child(1).outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->getResultCardinality();

      CostScalar baseRowcount = leftRC * rightRC;

      // If joining columns are unique, rowcount from equality
      // predicates is equal to that of non unique side
      //
      // multiColUec is a misnomer here as  it will be storing row count
      // of non-unique side instead of MC UEC.

      MC_cardinality = multiColUec * oldRowcount / baseRowcount;
    }
    else
    {
     // for non-unique joining column set, compute reduction as follows
     if (CmpCommon::getDefault(COMP_BOOL_145) == DF_OFF)
       MC_cardinality = SC_cardinality * prodInitUec/multiColUec;
     else
     {
       CostScalar sel = MAXOF((SC_cardinality / oldRowcount), COSTSCALAR_EPSILON);

       if (baseRCForMCUEC < multiColUec)
 	    multiColUec = baseRCForMCUEC;

       if (!aSemiJoin)
       {
         CostScalar selAdj = sel * prodInitUec / multiColUec;
         CostScalar adj = (csOne - sel * prodInitUec) / baseRCForMCUEC;
         selAdj = selAdj + adj;
         MC_cardinality = selAdj * oldRowcount;
       }
       else
       {
         MC_cardinality = oldRowcount * multiColUec / leftMCUec;
       }
     }
    }

     // Low bound sanity check for MC UEC. The cardinality should not go below
     // MINOF (join from single column histograms, 1/max multi col UEC)
     // The second factor (1/max MCUEC), takes into account the anti-correlation
     // where the join from single column histograms might be over estimated
     // For semi and anti semi join, we set the low bound as MIN of SC_cardinality
     // as oldRowcount is equal to leftrowcount for semi_joins which if outer is
     // unique results as lowBoundFromMCUec = 1
     CostScalar lowBoundFromMCUec = SC_cardinality;
     if (!aSemiJoin)
       lowBoundFromMCUec = MINOF(lowBoundFromMCUec, oldRowcount/multiColUec);
     newRowcount = MAXOF(lowBoundFromMCUec, MC_cardinality);

     // High bound sanity check. The cardinality cannot be higher than, minimum
     // single column join.
     newRowcount = MINOF(newRowcount, minSingleColJoinRC);
 }

 // ----------------------------------------------------------------------
 // this method will set the inputCard to all the colStats in this list.
 // This input cardinality is what comes from outer. This is used to
 // compute UECs in CalculateCorrectResultUec
 // -----------------------------------------------------------------------

 void ColStatDescList::setInputCard(CostScalar rows)
 {
   for ( CollIndex i = 0; i < entries(); i++ )
     (*this)[i]->setInputCard(rows);
 }
 // ----------------------------------------------------------------------
 //  ColStatDescList::synchronizeStats
 //  A CSDL utility routine used to call ColStatDesc::synchronizeStats()
 // ----------------------------------------------------------------------
 void
 ColStatDescList::synchronizeStats (const CostScalar & baseRowcount,
                                    const CostScalar & newRowcount,
                                    CollIndex loopLimit)
 {
   for ( CollIndex i = 0; i < loopLimit; i++ )
   {
     const CostScalar & oldCount = (*this)[i]->getColStats()->getRowcount();

     if ( oldCount != newRowcount )
       (*this)[i]->synchronizeStats( baseRowcount, newRowcount );
   }
 }

 void
 ColStatDescList::synchronizeStats (const CostScalar & newRowcount,
                                    CollIndex loopLimit)
 {
   for ( CollIndex i = 0; i < loopLimit; i++ )
   {
     const CostScalar & oldCount = (*this)[i]->getColStats()->getRowcount();
     (*this)[i]->synchronizeStats( oldCount, newRowcount );
   }

 }  // ColStatDescList::synchronizeStats

 // ---------------------------------------------------------------------
 //  ColStatDescList::mergeListPairwise
 //
 //  A routine used solely (today) for performing the implicit inner-equi-
 //  join between columns appearing as outer references in both children of
 //  the current join.
 //
 //  It presumes a certain structure to the given THIS ColStatDescList, and
 //  also presumes what type of join is to be done.  (e.g., we assume that
 //  there is an even number of ColStatDescSharedPtr's in the list.)

 // ---------------------------------------------------------------------
 CostScalar
 ColStatDescList::mergeListPairwise ()
 {
   CollIndex i = 0;
   CostScalar newRowcount = csZero;
   CostScalar newUec	 = csZero;

   ColStatDescSharedPtr rootStatDesc = (*this)[i];
   ColStatsSharedPtr rootColStats = rootStatDesc->getColStatsToModify();

   if ( rootColStats->getRowcount().isZero() )
   {
     // can't do much here....
     while ( i < this->entries() )
     {
       removeAt( i+1 );
       i++;
     }

     return csZero;
   }

   // sanity check
   if ((entries() % 2) != 0)
   {
     CCMPASSERT( entries() % 2 == 0 ); // should be an even number!
     //if not return without merging. Don't want to land up with an
     // unreferenced object in  the collections class
     return newRowcount;
   }

   // NB: we avoid having the resulting rowcount blow up by making sure
   // we don't divide by something that's less than 1

   const CostScalar saveRowcount = rootColStats->getRowcount();
   CostScalar totalReduct  = csOne;
   CostScalar maxReduct    = csOne;


   while ( i < this->entries() )
   {
     rootStatDesc = (*this)[i];
     rootColStats = rootStatDesc->getColStatsToModify();
     if( NOT rootColStats->isShapeChanged() &&
 	NOT (*this)[i+1]->getColStats()->isShapeChanged() )
     {
       rootStatDesc->mergeColStatDesc(
 	(*this)[i+1],
 	AND_MERGE,
 	TRUE // force merge
 	);
       // get the aggregate results following the latest merge
       newRowcount = rootColStats->getRowcount();
       newUec      = rootColStats->getTotalUec();

       CostScalar reduct = ( saveRowcount < csOne ?
 			  csOne : newRowcount / saveRowcount );
       totalReduct *= reduct;

       // remove the i+1'th entry.
       removeAt( i+1 );
     }
     else
       if ( rootColStats->isShapeChanged() )
 	removeAt( i+1 );
       else
 	removeAt( i );

     i++; // skip over just merged ColStats to next pair
   }

   newRowcount = saveRowcount * totalReduct;

   // Ensure that all histograms report the same new rowcount.
   synchronizeStats( newRowcount, entries() );

   return newRowcount;
 }  // ColStatDescList::mergeListPairwise


 // ----------------------------------------------------------------------
 // ColStatDescList::divideHistogramAtPartitionBoundaries
 // An external routine used to take a CDSL and identify, via the rows for
 // one of the range-partitioning table columns, which partitions are active
 // (i.e., > 0 rows returned) in a query.
 // ----------------------------------------------------------------------

 NABoolean ColStatDescList::divideHistogramAtPartitionBoundaries
    (const ValueIdList            & listOfPartKeys,      /*in*/
     const ValueIdList            & listOfPartKeyOrders, /*in*/
     const LIST(EncodedValueList *) & listOfPartBounds,    /*in*/
     ValueId       & keyCorrespondingToOutputRows,       /*out*/
     NABoolean     & isKeyAscending,                     /*out*/
     ColStats      & outputRows,                         /*out*/
     CollIndexList & outputFactors) const                /*out*/
 {
   // $$$ first effort, we assume that the first partititioning key is the
   // $$$ one we want
   //
   // $$$ this next stmt should be replaced by code that looks
   // $$$ and makes sure the first column doesn't have only one uec!!!
   CollIndex partKeyIndex = 0; // $$$ change this later !!!


   // let the caller know the ValueId of the partitiong key we'll be using
   keyCorrespondingToOutputRows = listOfPartKeys[partKeyIndex];
   const ValueId & keyOrder = listOfPartKeyOrders[partKeyIndex];

   const ItemExpr * ie = keyOrder.getItemExpr();
   OperatorTypeEnum ote = ie->getOperatorType();

   if ( keyOrder.getItemExpr()->getOperatorType() == ITM_INVERSE )
     isKeyAscending = FALSE; // #retval
   else
     isKeyAscending = TRUE;

   // now build the Histogram corresponding to this part. key; we will use
   // this to create the outputRows array that we return from this function
   HistogramSharedPtr targetHist(new Histogram(HISTHEAP));
   CollIndex i, numBounds = listOfPartBounds.entries();

   if ( isKeyAscending ) // insert the boundary values in their current order
   {
     for ( i = 0; i < numBounds; i++ )
       targetHist->insertAt(
 	i,
 	HistInt( listOfPartBounds[i]->at(partKeyIndex) )
 	);
   }
   else // insert the intervals in the reverse of their current order
   {
     for ( i = 0; i < numBounds; i++ )
       targetHist->insertAt(
 	i,
 	HistInt(listOfPartBounds[numBounds-i-1]->at(partKeyIndex))
 	);
   }

   // make sure the "MAX-valued" HistInt doesn't contain "NULL" -- recall
   // (see EncodedValue.[cpp h], NULL is encoded as MAX_DBL, which is what
   // the last HistInt in targetHist will have as a value if the partitioning
   // key is over a key of type double.
   if ( (*targetHist)[numBounds-1].getBoundary().isNullValue() )
   {
     (*targetHist)[numBounds-1].setBoundary(
       EncodedValue( _ENCODEDVALUE_CLOSE_TO_NULL_ )
       );
   }

   // Note that since we don't have any boundary-inclusiveness information,
   // we'll just pretend that none exists; arbitrarily, we decide to make
   // the flag be FALSE.
   // The effect of this :
   //
   //     <  <  <  <
   //     |  |  |  |
   //     |  |  |  |
   //     1  2  3  4
   //
   // This histogram represents a table with 3 partitions.  The SQL
   // partitions are specified at values 2 and 3.  By having the flags be
   // NOT-BOUNDARY-INCLUSIVE (FALSE), we are specifying that the first
   // partition has values from 1 (including 1) to 2 (not including 2); the
   // second partition has values from 2 (including 2) to 3 (not including
   // 3); similarly, the the third partition goes from 3 (including 3) to 4
   // (not including 4).
   //
   // If this interpretation of the SQL semantics is not correct,
   // please let me know!

   // now get the Histogram for the CSD matching the part. key we're using
   ColStatsSharedPtr sourceColStats =
     getColStatsPtrForColumn( keyCorrespondingToOutputRows );

   if ( sourceColStats == NULL ||
        sourceColStats->getHistogram()->entries() == 0 )
     return FALSE; // couldn't find it! quit!

   // at this point, we've found the colstats corresponding to the
   // partitioning key that we'll be using; and we have the corresponding
   // list of boundary values for that key; now we can generate the desired
   // information

   // ----------------------------------------------------------------------
   // Before we proceed, however, we need to make sure that there aren't
   // any contiguous partition boundaries' values which are equal.
   // Histogram semantics does not allow for this, so we have to fudge this
   // (possible, legitimate partition boundary value) case with a second
   // variable, which keeps track of the number of partitions per histogram
   // interval.  Before we code this, here's an example which should help
   // explain what we're trying to accomplish:
   //
   // Consider the following partition boundary intervals
   //
   //   |  |  |  |  |  |  |  |  |  |  |
   //   M  3  3  3  3  5  5  7  7  7  M
   //   I                             A
   //   N                             X
   //
   // We want to compress this to :
   //
   //   0  1  4  2  3
   //   -  -  -  -  -
   //   |  |  |  |  |
   //   M  3  5  7  M
   //   I           A
   //   N           X
   //
   // (where the number across the top indicate the # of partitions that
   // have values for that interval)
   //
   // Interpreting the condensed, internal histogram:
   //
   //   interval 1: [MIN,3) : 1 partition
   //   interval 2: [3,5)   : 4 partitions
   //   interval 3: [5,7)   : 2 partitions
   //   interval 4: [7,MAX) : 3 partitions
   //
   // To create the mapping (the numbers indicating how many partitions
   // have a particular boundary value), we simply count all of the
   // HistInts which have a particular boundary value, then add a "0" to
   // the beginning of our list of integers.  Note that we don't count for
   // the last partition boundary value (MAX).
   //
   // For the example above, our list-of-ints looks like :
   //
   // step1:   1 4 2 3
   // step2:   0 1 4 2 3
   //
   outputFactors.clear();
   CollIndex countDuplicates = 0;

   // we're counting the # of intervals that have the same boundary value
   for ( i = 0; i < targetHist->entries()-1; /* no automatic increment */ )
   {
     if ( (*targetHist)[i].getBoundary() == (*targetHist)[i+1].getBoundary() )
     {
       countDuplicates++;
       targetHist->removeAt( i+1 ); // in-list removal : no increment of 'i'
     }
     else
     {
       outputFactors.insertAt( i, 1 + countDuplicates );
       countDuplicates = 0; // reset
       i++; // go to next interval
     }
   }
   outputFactors.insertAt( 0, 0 ); // first HistInt always contains garbage info

   // Are we done?  Have we inserted the last number?  This is needed to
   // indicate the number of partitions which have the 2nd-to-last
   // partition boundary value (we don't count for MAX)
   if ( outputFactors.entries() < targetHist->entries() )
     outputFactors.insertAt( targetHist->entries()-1, 1 ); // last boundary should be "MAX"

   if (outputFactors.entries() != targetHist->entries() )
   {
     CCMPASSERT( outputFactors.entries() == targetHist->entries() ); // sanity check
     // histogram-to-partition-boundary-list mapping failed. Partition is unusable
     return FALSE;
   }

   if (outputFactors.entries() <= 0 )
   {
     CCMPASSERT( outputFactors.entries() > 0 );
     // histogram-to-partition-boundary-list mapping failed. Partition is unusable
     return FALSE;
   }

   if ( targetHist->entries() == 1 )
   {
     // unusual, but possible (is it?!) case : all partition boundary values are equal, ugh
     // --> if this happens, then make it a 2-HistInt histogram so we can at least
     // call the routines below
     const EncodedValue & bound = (*targetHist)[0].getBoundary();
     targetHist->clear();
     targetHist->insertZeroInterval( bound, bound, TRUE );

     outputFactors.insert( countDuplicates + 1 );
   }
   // ----------------------------------------------------------------------

   // ----------------------------------------------------------------------
   // less work for this special case: zero rows in sourceColStats
   if ( sourceColStats->getRowcount().isZero() )
   {
     outputRows.setHistogram( new HISTHEAP Histogram( *targetHist, HISTHEAP ) );
     outputRows.setMaxMinValuesFromHistogram();
     outputRows.setRowsAndUec( csZero, csZero );
     return TRUE ;
   }
   // ----------------------------------------------------------------------

   // this histogram is the source of the rows that we'll be placing in targetHist
   HistogramSharedPtr sourceHist = sourceColStats->getHistogram();

   // create the template histogram, then populate it
   HistogramSharedPtr templateHist =
     sourceHist->createMergeTemplate( targetHist, FALSE );
   ColStats templateStats( templateHist, HISTHEAP );

   CollIndex templateEntries = templateHist->entries();
   templateStats.populateTemplate( sourceColStats );
   if ( templateHist->entries() != templateEntries )
     return FALSE; // something went very wrong!

   // now, "squeeze" the resulting histogram so that it only keeps the
   // interval boundaries from the targetHistogram
   NABoolean result =
     templateHist->condenseToPartitionBoundaries( targetHist );

   if ( result != TRUE )
     return result; // something went wrong!

   // now, return the resulting Histogram in a format that we can use
   outputRows.setHistogram( templateHist );
   outputRows.setMaxMinValuesFromHistogram();
   outputRows.setRowsAndUecFromHistogram();

   return TRUE;
   // that's all folks!
 }

 // compress all histograms in the list to a single interval histogram
 void
 ColStatDescList::compressColStatsToSingleInt()
 {
   for (CollIndex i = 0; i < entries(); i++)
   {
     ColStatsSharedPtr colStat = (*this)[i]->getColStatsToModify();
     if (!colStat->isVirtualColForHist() && !colStat->isOrigFakeHist())
       colStat->compressToSingleInt();
   }
 }

 // ----------------------------------------------------------------------
 //  ColStatDescList::mergeSpecifiedStatDescs
 //  A utility routine used to merge specific ColStatDesc's together.
 // ----------------------------------------------------------------------
 void
 ColStatDescList::mergeSpecifiedStatDescs (const CollIndexList & statsToMerge,
                                           CollIndex rootIndex,
                                           MergeType mergeMethod,
                                           CollIndex numOuterColStats,
                                           CostScalar & newRowcount,
                                           CostScalar & newUec,
                                           NABoolean forVEGPred,
 					  OperatorTypeEnum opType)
 {
   ColStatDescSharedPtr rootDesc = (*this)[rootIndex];

   CostScalar saveRowcount = newRowcount;
   CostScalar saveUec      = newUec;

   for ( CollIndex i = 0; i < statsToMerge.entries(); i++ )
   {
     if ( statsToMerge[i] != rootIndex )
     {
       // If the statistics to be merged are from opposite sides of the
       // numOuterColStats boundary, then we are doing a
       //      left_table_column = right_table_column
       // merge that should be done as the caller requested.
       // Otherwise, we're merging columns from the same table, and
       // selectivity for that is HIST_NO_STATS_UEC.
       MergeType localMergeMethod = mergeMethod;
       NABoolean joinOnOneTable = FALSE;
       ColStatDescSharedPtr tmpDesc(new (HISTHEAP)
 	     ColStatDesc( *((*this)[statsToMerge[i]]) ), HISTHEAP);

       if ( NOT ( rootIndex < numOuterColStats &&
 		 statsToMerge[i] >= numOuterColStats ) )
       {
         // even though the mergeMethod may never be used, but we are
         // initializing this for cases when COMP_BOOL_74 is OFF
 	localMergeMethod = INNER_JOIN_MERGE;
 	joinOnOneTable = rootDesc->mergeColStatDescOfSameTable(tmpDesc, opType);
       }

       if (!joinOnOneTable)
       {
 	rootDesc->mergeColStatDesc(tmpDesc,
                                    localMergeMethod,
                                    FALSE, // don't force merge
                                    opType
                                    );
       }

       // get the aggregate results following the latest merge
       ColStatsSharedPtr colStats = rootDesc->getColStats();
       newRowcount = colStats->getRowcount();
       newUec      = colStats->getTotalUec();

       // update the 'saved' information
       saveRowcount = newRowcount;
       saveUec      = newUec;

       // if this isn't a VEG, then it's an equality predicate.
       // update the info used to support transitive closure for
       // non-VEG equality predicates.
       if ( NOT forVEGPred )
       {
 	tmpDesc->setFromInnerTable(
 	  statsToMerge[i] >= numOuterColStats ? TRUE : FALSE
 	  );

 	rootDesc->nonVegEquals().insert( tmpDesc );
       }
     }
   }
 }  // ColStatDescList::mergeSpecifiedStatDescs

 // -----------------------------------------------------------------------
 // Input: inputColumn, the column for which we need the ColStats
 // output: TRUE if there was a ColStatDesc for "inputColumn" in this
 //         ColStatDescList, in which case its index is returned
 //         in "index". FALSE otherwise.
 // -----------------------------------------------------------------------
 //
 // Logic:
 //
 // A ColStatDescList contains a list of ColStatDesc in no particular
 // order.  Every ColStatDesc represents a facade to a ColStats, i.e. to a
 // histogram.  Sometimes, the histogram is fake but the ColStatDesc
 // encapsulates this fact.  Every ColStatDesc has important fields that
 // describe to which column it applies to:
 //
 // mergeState_: a ValueIdSet, describing which histograms have been merged
 // with it.  Each element in the set contains the base column of the
 // histogram that has been merged with the current one.
 //
 // -----------------------------------------------------------------------
 NABoolean
 ColStatDescList::getColStatDescIndexForColumn (
   CollIndex& index, /* out */
   const ValueId& inputColumn /* in */
   ) const
 {

   // ----------------------------------------------------------------
   // Find the base column for the input column:
   // ----------------------------------------------------------------

   const ItemExpr *inputColumnIEPtr = inputColumn.getItemExpr();

   ValueId baseColumnForInputColumn;
   ValueId vegColumnForInputColumn;
   switch ( inputColumnIEPtr->getOperatorType() )
   {
   case ITM_VEG_REFERENCE:
     {
       // -------------------------------------------------
       //  The inputColumn is a VEG reference
       //  Loop through the columns until you find it:
       // -------------------------------------------------
       const VEG * exprVEG = ((VEGReference *)inputColumnIEPtr)->getVEG();
       const ValueIdSet & VEGGroup  = exprVEG->getAllValues();

       if(exprVEG->seenBefore())
 	return FALSE;
       else
       {
 	exprVEG->markAsSeenBefore();
 	for ( ValueId id = VEGGroup.init();
 	      VEGGroup.next( id );
 	      VEGGroup.advance( id ) )
 	{
 	  if ( getColStatDescIndexForColumn( index, id ) )
 	  {
 	    exprVEG->markAsNotSeenBefore();
 	    return TRUE;
 	  }
 	}
       // If we are here, then we did not find the column
       exprVEG->markAsNotSeenBefore();
       return FALSE;
       }
     }
     break;

   case ITM_INSTANTIATE_NULL:
   case ITM_BASECOLUMN:
     // -------------------------------------------------
     //  The inputColumn is a base column.
     // -------------------------------------------------
     baseColumnForInputColumn = inputColumn;
     break;

   case ITM_INDEXCOLUMN:
     // -------------------------------------------------
     //  Get the base column for the index column:
     // -------------------------------------------------
     {
       const BaseColumn *bcIEPtrForIndexColumn =
 	(BaseColumn *) ((IndexColumn *) inputColumnIEPtr)->
 	getDefinition().getItemExpr();

       baseColumnForInputColumn = bcIEPtrForIndexColumn->getValueId();
       break;
     }

   default:
      // give a last shot to see if a histogram exists for the expression
     // that we might be looking. Example we could have an aggregate
     // in a VEG, and we may also have a histogram for that. Sol:10-090110-9758
     vegColumnForInputColumn = inputColumn;
   } // switch on type of input column


   // -----------------------------------------------------------------------
   // Now, traverse this list and check whether there is a ColStatDesc
   // for the given inputColumn, if so save its index in "index"
   // -----------------------------------------------------------------------
   NABoolean foundInBT = FALSE; // assume no ColStatDesc for this column
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     // Obtain the info for the current ColStatDesc:
     ColStatDescSharedPtr currentColStatDesc = (*this)[i];
     // The "merge state" of a ColStatDesc indicates all the
     // columns that have been merged into this ColStatDesc.
     // Initially, the merge state consists of the original
     // base table column, therefore, this will work even for
     // ColStatDesc's that have not been merged
     const ValueIdSet & msSet = currentColStatDesc->getMergeState();
     if ( ( msSet.contains( baseColumnForInputColumn ) ) ||
          ( currentColStatDesc->VEGColumn() == vegColumnForInputColumn) ||
          ( currentColStatDesc->getColumn() == baseColumnForInputColumn ) )
     {  // found!
       index = i;
       foundInBT = TRUE;
       break;
     }
   }

   return foundInBT;
 } // getColStatDescIndexForColumn(CollIndex & index, const ValueId column) const

 // -----------------------------------------------------------------------
 // Input: inputColumn, the column for which we need the ColStats
 //        partKeyColArray, columns for which we need the ColStats
 // output: TRUE if there was a ColStatDesc for "partKeyColArray or inputColumn"
 //         in this ColStatDescList, in which case its index is returned
 //         in "index". FALSE otherwise.
 // -----------------------------------------------------------------------
 NABoolean
 ColStatDescList::getColStatDescIndexForColumn (
   CollIndex& index, /* out */
   const ValueId& inputColumn, /* in */
   NAColumnArray& partKeyColArray /* in */
   ) const
 {

   // single column partitioned table
   if (partKeyColArray.entries() <= 1)
      return (getColStatDescIndexForColumn(index, inputColumn));

   // if multi-column partition key, find an MC with columns that are prefix to the
   // partition column list
   for ( CollIndex i = 0; i < entries(); i++ )
   {
      if (((*this)[i]->getColStats()->isMCforHbasePartitioning()) &&
          (partKeyColArray.entries() >= (*this)[i]->getColStats()->getStatColumns().entries()))
      {
          index = i;
          return TRUE;
      }
   }

   return FALSE;

 }


 // Get the index of the ColStatDesc for a particular valueId.
 // The match is based on the VEGColumn field.
 NABoolean
 ColStatDescList::getColStatDescIndex (CollIndex& index,           /* out */
                                       const ValueId& value) const /* in */
 {
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     // Obtain the info for the current ColStatDesc:
     ColStatDescSharedPtr currentColStatDesc = (*this)[i];
     const ValueId & veg = currentColStatDesc->getVEGColumn();

     if ( veg == value )
     {  // found!
       index = i;
       return TRUE;
     }
   }
   return FALSE;
 }

 // -----------------------------------------------------------------------
 // This routine returns the ColStats (i.e. the histogram) that corresponds
 // to the given inputColumn.
 //
 // Input:
 // ======
 //  const ValueId& inputColumn : a ValueId denoting the wanted histogram
 //
 // Output:
 // =======
 //  A NON-NULL ColStatsSharedPtr for the histogram that describes the given
 //  inputColumn (if such histogram exists)
 //
 //  NULL if a histogram for the given inputColumn does not exist in the
 //  ColStatDescList
 // -----------------------------------------------------------------------

 ColStatsSharedPtr
 ColStatDescList::getColStatsPtrForColumn (const ValueId& inputColumn) const
 {

   ColStatsSharedPtr colStatsPtr;
   CollIndex index = 0;
   NABoolean found = getColStatDescIndexForColumn( index, inputColumn );
   if ( NOT found )
   {
     return NULL;
   }
   else
   {
     if ((index < 0) || (index >= entries()) )
     {
       // if the index is out side the range of histogram list, return
       // NULL pointer indicating that the histogram is not found in the
       // list
       CCMPASSERT( (index >= 0) AND (index < entries() ));
       return NULL;
     }

     return (*this)[index]->getColStats();
   }
 }

 // -----------------------------------------------------------------------
 // This method returns the ColStatsSharedPtr for the ColStats that references
 // the given predicate if it exists, otherwise it returns NULL
 // -----------------------------------------------------------------------
 ColStatsSharedPtr
 ColStatDescList::getColStatsPtrForPredicate (const ValueId& predicate) const
 {
   ColStatsSharedPtr colStatsPtr;

   const ItemExpr *predIE = predicate.getItemExpr();
   if (!predIE->isAPredicate())
     return NULL;

   const Int32 arity = predIE->getArity(); // for debugging
   switch ( arity )
   {
     case 3:
     case 2:
     {
       // a join predicate of the form col1 op col2 or similar
       ItemExpr *leftExpr  = predIE->child(0);
       ItemExpr *rightExpr = predIE->child(1);

       const ValueId &leftChildVid  = leftExpr->getValueId();
       const ValueId &rightChildVid = rightExpr->getValueId();

       // ------------------------------------------------------------------
       // Process left child:
       // ------------------------------------------------------------------
       if ( leftExpr->isAPredicate() )
       {
 	      colStatsPtr = getColStatsPtrForPredicate( leftChildVid );
       }
       else if ( leftExpr->getOperatorType() == ITM_VEG_REFERENCE )
       {
 	      ValueIdSet vidSet;
 	      vidSet.insert(
 	        ((VEGPredicate *)leftExpr)->getVEG()->getAllValues() );
 	      colStatsPtr = getColStatsPtrForVEGGroup( vidSet );
       }
       else
       {
 	      // It is not a predicate NOR a reference, then it
 	      // must be an expression, get the columns it refers
 	      // to
 	      leftExpr = leftExpr->getLeafValueIfUseStats();
               const ValueId &leftVid  = leftExpr->getValueId();
 	      colStatsPtr = getColStatsPtrForColumn( leftVid );
       }


       if ( colStatsPtr == NULL )
       {
 	      // ---------------------------------------------------------------
 	      // ColStats not found in left child, try to find it in right:
 	      // ---------------------------------------------------------------
 	      if ( rightExpr->isAPredicate() )
 	      {
 	        colStatsPtr = getColStatsPtrForPredicate( rightChildVid );
 	      }
 	      else if ( rightExpr->getOperatorType() == ITM_VEG_REFERENCE )
 	      {
 	        ValueIdSet vidSet;
 	        vidSet.insert(
 	          ((VEGPredicate *)rightExpr)->getVEG()->getAllValues() );
 	        colStatsPtr = getColStatsPtrForVEGGroup( vidSet );
 	      }
 	      else
 	      {
 	        // It is not a predicate NOR a reference, then it
 	        // must be an expression, get the columns it refers
 	        // to
 	        rightExpr = rightExpr->getLeafValueIfUseStats();
 	        const ValueId &rightVid  = rightExpr->getValueId();
 	        colStatsPtr = getColStatsPtrForColumn( rightVid );
 	      }
       } // if colstats not found in left child
     } // if arity is 2
     break;

   case 0:
     {
       if ( predIE->getOperatorType() == ITM_VEG_PREDICATE )
       {
 	      const ValueIdSet & vegGroup =
 	        ((VEGPredicate *)predIE)->getVEG()->getAllValues();
 	      colStatsPtr = getColStatsPtrForVEGGroup( vegGroup );
       }
       else
       {
         CCMPASSERT( predIE->getOperatorType() == ITM_VEG_PREDICATE );
         // add code to handle case here...
         return NULL;
       }

     }
     break;

   default:
     {
       return NULL; // For unary logic predicates, there is nothing to return
     }

   } // case getArity()


   return colStatsPtr;
 }

 ColStatsSharedPtr
 ColStatDescList::getColStatsPtrForVEGGroup(const ValueIdSet& VEGGroup) const
 {
   ColStatsSharedPtr colStatsPtr = NULL;
   // Get the first ColStats for any value that is a column:

   ValueIdSet leafValuesForExpr;

   for ( ValueId vid = VEGGroup.init();
 	VEGGroup.next( vid );
 	VEGGroup.advance( vid ) )
   {
     ItemExpr * vidIePtr = vid.getItemExpr();
     switch ( vidIePtr->getOperatorType() )
     {
     case ITM_BASECOLUMN:
     case ITM_INDEXCOLUMN:
       {
 	colStatsPtr = getColStatsPtrForColumn( vid );
 	if ( colStatsPtr != NULL ) return colStatsPtr;
 	break;
       }

     case ITM_INSTANTIATE_NULL:
       {
 	InstantiateNull *inst = (InstantiateNull *)vidIePtr->castToItemExpr();
 	if ( NOT inst->NoCheckforLeftToInnerJoin )
 	{
 	  // if not a left join transformation
 	  colStatsPtr = getColStatsPtrForColumn( vid );
 	  if ( colStatsPtr != NULL ) return colStatsPtr;
 	  break;
 	}
 	else
 	{
 	  const ValueId & childVid = vidIePtr->child(0).getValueId();
 	  const ItemExpr * childVidIePtr = childVid.getItemExpr();
 	  if ( childVidIePtr->getOperatorType() == ITM_VEG_REFERENCE )
 	  {
             VEG *veg = ((VEGReference *)childVidIePtr)->getVEG();

             if (veg->seenBefore())
               break;

             veg->markAsSeenBefore();
             const ValueIdSet& vegGroup = veg->getAllValues();
             colStatsPtr = getColStatsPtrForVEGGroup( vegGroup );

             veg->markAsNotSeenBefore();

             if ( colStatsPtr != NULL ) return colStatsPtr;
             break;
 	  }
 	  else if (    childVidIePtr->getOperatorType() == ITM_BASECOLUMN
 		    OR childVidIePtr->getOperatorType() == ITM_INDEXCOLUMN
 		  )
 	  {
 	    colStatsPtr = getColStatsPtrForColumn( childVid );
 	    if ( colStatsPtr != NULL ) return colStatsPtr;
 	    break;
 	  }
 	}
 	break;
       }

     case ITM_VEG_REFERENCE:
       {
         VEG *veg = ((VEGReference *)vidIePtr)->getVEG();

         if (veg->seenBefore())
           break;

         veg->markAsSeenBefore();

         // Get all members of VEGRef:
         const ValueIdSet& vegGroup = veg->getAllValues();

         colStatsPtr = getColStatsPtrForVEGGroup( vegGroup );

         veg->markAsNotSeenBefore();

 	if ( colStatsPtr != NULL ) return colStatsPtr;
 	break;
       }

     case ITM_PI:
     case ITM_CACHE_PARAM:
 	case ITM_CONSTANT:
 	case ITM_HOSTVAR:
 	case ITM_DYN_PARAM:
 	case ITM_CURRENT_USER:
 	case ITM_SESSION_USER:
         case ITM_CURRENT_TIMESTAMP:
         case ITM_UNIX_TIMESTAMP:
 	case ITM_UNIQUE_ID:
 	case ITM_GET_TRIGGERS_STATUS:
 	case ITM_UNIQUE_EXECUTE_ID:
 	  continue;
     default:
       // couldn't find the histogram, continue to look for other
       // values in a VEG group. Also collect the leaf values if
       // it is an expression  for which we can use histograms.
       // These may be used later if we are unable to find any histograms
       ItemExpr * leafValue = vidIePtr->getLeafValueIfUseStats();
       if (leafValue != vidIePtr)
       {
 	ValueIdSet lvSet;
 	if (leafValue->getOperatorType() == ITM_CASE)
 	{
 	  leafValue->getLeafValueIdsForCaseExpr(lvSet);
 	}
 	else
 	  leafValue->findAll(ITM_BASECOLUMN, lvSet, TRUE, TRUE);
         leafValuesForExpr.addSet(lvSet);
       }
 	continue;
     } // end case

   } // end for

   // if we did not find any histograms till now, and there were some
   // expressions in the VEG for which we can use stats,
   if ((colStatsPtr == NULL) && (leafValuesForExpr.entries() > 0))
   {
     CollIndex idx;
     if (getColStatDescIndexForColWithMaxUec(idx, leafValuesForExpr))
       colStatsPtr = (*this)[idx]->getColStats();
   }

   return colStatsPtr; // if we get here, this should be NULL

 } // ColStatDescList::getColStatsPtrForVEGGroup(const ValueIdSet& VEGGroup)

 CostScalar
 ColStatDescList::getUecOfJoiningCols(ValueIdSet & joinedColSet) const
 {
   CostScalar minUec = COSTSCALAR_MAX;
   CostScalar currUec = csOne;

   for (ValueId vid = joinedColSet.init();
        joinedColSet.next(vid);
        joinedColSet.advance(vid) )
        {
 	  ColStatsSharedPtr colStats = this->getColStatsPtrForColumn(vid);
 	  if (colStats)
 	  {
 	    currUec = colStats->getTotalUec();
 	    if (currUec < minUec)
 	      minUec = currUec;
 	  } // if ColStats
        } // end for joinedColSet

        return minUec;

 } // ColStatDescList::getUecOfJoiningCols


 void
 ColStatDescList::print (ValueIdList selectListCols,
                         FILE *ofd,
                         const char * prefix,
                         const char * suffix,
                         CollHeap *c, char *buf,
                         NABoolean hideDetail) const
 {
   NABoolean atLeastOnePrinted = FALSE;
   NABoolean runningShowQueryStatsCmd = (selectListCols.entries() > 0);
   Space * space = (Space *)c;
   char mybuf[1000];

   sprintf(mybuf,
           "**************************************************************\n");
   PRINTIT(ofd, c, space, buf, mybuf);

   for (CollIndex colStatDescIndex=0;
        colStatDescIndex < entries();
        colStatDescIndex++)
   {
     ColStatDescSharedPtr statDesc = (*this)[colStatDescIndex];
     if(runningShowQueryStatsCmd)
     {
       if(!selectListCols.contains(statDesc->getVEGColumn()) &&
         (atLeastOnePrinted || colStatDescIndex < (entries()-1)))
         continue;
     }

     if (atLeastOnePrinted)
     {
       sprintf(mybuf,
               "-------------------------------------------------------\n") ;
       PRINTIT(ofd, c, space, buf, mybuf);
     }
     else
       atLeastOnePrinted = TRUE;

     statDesc->print(ofd,prefix,suffix,c,buf,hideDetail);
   }

   sprintf(mybuf,
           "**************************************************************\n");
   PRINTIT(ofd, c, space, buf, mybuf);
 }

 void
 ColStatDescList::display() const
 {
   ValueIdList emptySelectList;
   print(emptySelectList);
 }

 void
 ColStatDescList::verifyInternalConsistency(CollIndex start, CollIndex end) const
 {
   CCMPASSERT ( start <= end ); // misuse of function!
   if ( start >= entries() ) return;
   if ( end   >  entries() ) return;

   const CostScalar & matchRowcount =
     (*this)[start]->getColStats()->getRowcount();
   CollIndex i = start + 1;
   for ( ; i < end; i++ )
   {
     const CostScalar & rc = (*this)[i]->getColStats()->getRowcount();
     if ( NOT ( rc == matchRowcount ) )
     {
 #ifdef MONITOR_SAMEROWCOUNT
       this->display() ;
       CCMPASSERT ( (*this)[i]->getColStats()->getRowcount() == matchRowcount );
 #endif
       break ;
     }
   }

   // now handle the case of histograms with zero entries
   for ( i = start; i < end; i++ )
   {
     if ( (*this)[i]->getColStats()->getHistogram()->entries() == 0 )
     {
       ColStatsSharedPtr stats = (*this)[i]->getColStatsToModify();
       stats->setToSingleInterval (
 	            stats->getMinValue(),
 	            stats->getMaxValue(),
 	            stats->getRowcount(),
 	            stats->getTotalUec()
 	            );
       stats->setRedFactor    ( csOne );
       stats->setUecRedFactor ( csOne );
       stats->setFakeHistogram();
     }
   }
 }

 // this function has a very simple algorithm:
 // (1) if the histograms all have the same rowcount, done
 // (2) otherwise, find the first that's not a fake histogram; set all to
 //     have its rowcount
 // (3) otherwise, find average of all rowcounts, set all to this value

 void
 ColStatDescList::enforceInternalConsistency(CollIndex start,
                                             CollIndex end,
                                             NABoolean printNoStatsWarning)
 {
   CCMPASSERT ( start <= end ); // misuse of function!
   if ( start >= entries() ) return;
   if ( end   >  entries() ) return;

   // first handle the case of histograms with zero entries
   //
   // --> At the same time, if param "printNoStatsWarning" is TRUE, then
   // fire off a warning message for every fake Colstats that has the
   // isUpStatsNeeded() flag set.  Note that we leave this flag set in
   // order to enable other we-need-stats code in other routines.
   CollIndex i = start;
   for (; i < end; i++ )
   {
     ColStatDescSharedPtr cdesc = (*this)[i];
     ColStatsSharedPtr cs = cdesc->getColStats();

     if (cs == NULL)
     {
       CCMPASSERT(cs != NULL);
       continue;
     }

     if (cs->getHistogram() == NULL)
     {
        CCMPASSERT("Histogram is NULL");
        cs->insertZeroInterval();
     }

     if ( cs->getHistogram()->entries() == 0 )
     {
       cs = (*this)[i]->getColStatsToModify() ; // why we did the const-cast
       cs->setToSingleInterval (
 	        cs->getMinValue(),
 	        cs->getMaxValue(),
 	        cs->getRowcount(),
 	        cs->getTotalUec()
 	        ) ;
       cs->setRedFactor    ( csOne );
       cs->setUecRedFactor ( csOne );
       cs->setFakeHistogram();
     }

     // Warning 6008 was earlier given for all columns, even if they did
     // not participate in the query. It is now given for only those columns whose
     // histograms are needed.
     // All missing Stats warning are controlled by the CQD HIST_MISSING_STATS_WARNING_LEVEL
     // Warnings are displayed only if the value of the CQD is greater than 0
     if(cs->getStatColumns().entries())
     {
       if (  printNoStatsWarning && cs->isUpStatsNeeded() )
       {
         if ( ( cs->isFakeHistogram() ) || (cs->isSmallSampleHistogram()) ) // if fake or small sample histogram, fire off the warning!
         {
           ValueId colId = cdesc->getColumn();
           BaseColumn * colExpr = colId.castToBaseColumn();

           if (colExpr != NULL)
           {
             // By this time we have ensured that it is a base column
             TableDesc * tableDescForCol = colExpr->getTableDesc();
             const MultiColumnUecList * ueclist =  getUecList() ;
             NABoolean quickStats = FALSE;
             if (cs->isSmallSampleHistogram())
               quickStats = TRUE;
             ueclist->displayMissingStatsWarning(tableDescForCol,
 				                  colId,
 				                  TRUE,
                                                   TRUE,
                                                   *this,
                                                   csMinusOne, // displaying missing single column warnings
                                                   quickStats);
           }
         }
       }
     }
   }

   NABoolean allHaveSameRowcount = TRUE;

   const CostScalar & matchRowcount =
     (*this)[start]->getColStats()->getRowcount();

   for ( i = start+1; i < end; i++ )
   {
     const CostScalar & rc = (*this)[i]->getColStats()->getRowcount();
     if ( NOT ( rc == matchRowcount ) )
     {
       allHaveSameRowcount = FALSE;
       break ;
     }
   }

   if ( allHaveSameRowcount ) return; // CASE (1), done

   // OK, they're inconsistent ... rectifying ...

   CollIndex firstNonFake = NULL_COLL_INDEX;
   CostScalar firstNonFakeRowcount = csMinusOne;

   for ( i = start; i < end; i++ )
   {
     if ( NOT (*this)[i]->getColStats()->isFakeHistogram() )
     {
       firstNonFake = i;
       firstNonFakeRowcount = (*this)[i]->getColStats()->getRowcount();
       break;
     }
   }

   if ( firstNonFake != NULL_COLL_INDEX ) // CASE (2)
   {
     if (firstNonFakeRowcount < 0)
     {
       CCMPASSERT( firstNonFakeRowcount.isGreaterOrEqualThanZero() );
       firstNonFakeRowcount = 0;
     }

     for ( i = start; i < end; i++ )
     {
       const CostScalar & rc = (*this)[i]->getColStats()->getRowcount();
       (*this)[i]->synchronizeStats( rc, firstNonFakeRowcount );
     }
     return ; // done
   }

   // OK, they're all fake histograms ... can't do much better than averaging them ... sigh

   // CASE (3)
   CostScalar sumRC = csZero;
   for ( i = start; i < end; i++ )
   {
     sumRC += (*this)[i]->getColStats()->getRowcount();
   }

   CostScalar avgRC = sumRC / ( end - start );
   for ( i = start; i < end; i++ )
   {
     const CostScalar & rc = (*this)[i]->getColStats()->getRowcount();
     (*this)[i]->synchronizeStats( rc, avgRC );
   }
   // done, finally
 }

 ValueIdSet ColStatDescList::appliedPreds () const
 {
   ValueIdSet result;
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     result += (*this)[i]->appliedPreds();
   }
   return result;
 }

 ValueIdSet ColStatDescList::VEGColumns () const
 {
   ValueIdSet result;
   for ( CollIndex i = 0; i < entries(); i++ )
   {
     result += (*this)[i]->VEGColumn();
   }
   return result;
 }


 // -----------------------------------------------------------------------
 //  methods on MultiColumnUecList class
 // -----------------------------------------------------------------------

 // -----------------------------------------------------------------------
 // MultiColumnUecList::HashFunction
 //
 // we need some sort of hashing function in order to use NAHASHDICTIONARY;
 // this is a first effort, obviously fairly naive and unsophisticated.
 // -----------------------------------------------------------------------


 ULng32
 MultiColumnUecList::HashFunction (const ValueIdSet & input)
 {
   ULng32 retval = 1 + input.entries();

   for ( ValueId id = input.init(); input.next(id); input.advance(id) )
     retval += (CollIndex) id;  // add up the ValueId's

   return retval ;
 }

 // default constructor

 MultiColumnUecList::MultiColumnUecList () :
      HASHDICTIONARY(ValueIdSet,CostScalar) (&(MultiColumnUecList::HashFunction),
                                             17, // original hash size ... why not?
                                             TRUE, // uniqueness constraint
                                             HISTHEAP )
      { };

 // -----------------------------------------------------------------------
 // MultiColumnUecList :: ctor
 //
 // builds the MultiColumnUecList from the initial StatsList object
 //
 // does the NAColumn -> ValueId conversion found in the ColStatDesc ctor
 //
 // this function is only called from TableDesc::getTableColStats()
 // -----------------------------------------------------------------------
 MultiColumnUecList::MultiColumnUecList (const StatsList   & initStats,
                                         const ValueIdList & tableColumns) :
      HASHDICTIONARY(ValueIdSet,CostScalar) (&(MultiColumnUecList::HashFunction),
                                             17, // original hash size ... why not?
                                             TRUE, // uniqueness constraint
                                             HISTHEAP )
 {
   // the StatsList has two lists which it uses to store the information we
   // need to fill the MultiColumnUecList with <table-col-list,uec value> pairs:
   //
   // LIST(NAColumnArray) groupUecColumns_
   // LIST(CostScalar)    groupUecValues_

   CostScalar rowCount = initStats[0]->getRowcount();

   // loop through the list of NAColumnArray's
   for ( CollIndex i = 0; i < initStats.groupUecColumns_.entries(); i++ )
   {
     const NAColumnArray & uecCols = initStats.groupUecColumns_[i];

     ValueIdSet insertCols;

 	// multiColUec
 	CostScalar multiColUec(initStats.groupUecValues_[i]);

 	// Upper limit to the multi column UEC
 	CostScalar maxMCUec = csOne;

 	// lower limit to the multi-col UEC
 	CostScalar minMCUec = csOne;

 	// would be set to TRUE if any histogram is missing statistics
 	NABoolean statsMissing = FALSE;

     // for each NAColumnArray, map each NAColumn to a ValueId
     for ( CollIndex j = 0; j < uecCols.entries(); j++ )
     {
 	  Lng32 position = uecCols[j]->getPosition();
       const ValueId & id = tableColumns[position];
 	  CostScalar singleColUec = initStats.getSingleColumnUECCount(position);

 	  if (singleColUec < 1)
 		statsMissing = TRUE;

 	  //MCUec cannot be less than the UEC count of any of the columns that
 	  //are included in the MCHistogram.

       minMCUec = MAXOF(minMCUec, singleColUec);

 	  maxMCUec *= singleColUec;

       insertCols.insert( id );
     }

 	// The prodUec could be negative if it involves a column that is not
 	// being referenced in the query. In such cases, we shall use rowcount
 	// as the upper limit

 	if (statsMissing)
 	  maxMCUec = rowCount;
 	else
 	  maxMCUec = MINOF(maxMCUec, rowCount);

 	// multi-col UEC should not exceed the product of single col UEC
 	// or row count which ever is smaller
 	// And multi-col UEC should be at least equal to max single column
 	// UEC of columns participating in multi-col

 	if (multiColUec < minMCUec)
 	  multiColUec = minMCUec;

 	if (multiColUec > maxMCUec)
 	  multiColUec = maxMCUec;

 	// now we've converted each NAColumn->ValueId in a particular
     // NAColumnArray; now insert this list and its corresponding uec

     insertPair( insertCols, multiColUec );
   }
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::insertList
 //
 // inserts all entries from OTHER into THIS (unless a particular entry
 // already exists in THIS)
 // -----------------------------------------------------------------------
 void
 MultiColumnUecList::insertList (const MultiColumnUecList * other)
 {
   if ( other == NULL ) return;
   if ( other == this ) return;
   if ( other->entries() == 0 ) return;

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;

   MultiColumnUecListIterator iter( *other );

   iter.getNext( keyEntry, uecEntry );

   while ( keyEntry != NULL && uecEntry != NULL )
   {
     if ( NOT contains( keyEntry ) )
       insertPair( *keyEntry, *uecEntry );

     iter.getNext( keyEntry, uecEntry );
   }
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::insertMappedList
 //
 // inserts all entries from OTHER into THIS, after mapping the ValueIds
 // in the list, using MAP. Note that we do the mapping in the "up"
 // direction.
 // -----------------------------------------------------------------------
 void
 MultiColumnUecList::insertMappedList(const MultiColumnUecList *other,
                                      const ValueIdMap &map)
 {
   if ( other == NULL ) return;
   if ( other->entries() == 0 ) return;

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;

   MultiColumnUecListIterator iter( *other );

   iter.getNext( keyEntry, uecEntry );

   while ( keyEntry != NULL && uecEntry != NULL )
     {
       ValueIdSet *mappedSet = new(HISTHEAP) ValueIdSet;

       map.mapValueIdSetUp(*mappedSet, *keyEntry);
       // Todo: CSE: This is unlikely to work, since the stats will be
       // expressed in BaseColumns, while the map contains VEGRefs.
       // Uncomment the assert below and run compGeneral/TEST045
       // to see the problem.
       // DCMPASSERT(*mappedSet != *keyEntry);
       if ( NOT contains( mappedSet ) )
         insertPair( *mappedSet, *uecEntry );

       iter.getNext( keyEntry, uecEntry );
     }
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::insertPair
 //
 // inserts a <table-column-valueidset, groupUec> pair
 // -----------------------------------------------------------------------
 NABoolean
 MultiColumnUecList::insertPair (const ValueIdSet & columns,
 				const CostScalar & groupUec)
 {
   ValueIdSet * columnsCopy  = new (HISTHEAP) ValueIdSet( columns );
   CostScalar * groupUecCopy = new (HISTHEAP) CostScalar( groupUec );
   if ( columnsCopy ==
        (NAHashDictionary<ValueIdSet,CostScalar>::insert( columnsCopy, groupUecCopy )) )
     return TRUE;  // insert successful
   else
     return FALSE; // insert failed
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::updatePair
 //
 // updates the groupUec of <table-column-valueidset, groupUec> pair
 // -----------------------------------------------------------------------
 NABoolean
 MultiColumnUecList::updatePair (const ValueIdSet & columns,
 				const CostScalar & groupUec)
 {
   ValueIdSet * columnsCopy  = new (HISTHEAP) ValueIdSet( columns );
   if ( columnsCopy ==
        (NAHashDictionary<ValueIdSet,CostScalar>::remove( columnsCopy)) )
   {
     if ( insertPair( columns, groupUec ) )
     return TRUE;
   }// update successful
   return FALSE; // update failed
 }


 // -----------------------------------------------------------------------
 // MultiColumnUecList::lookup
 //
 // given a <table-column-valueidset>, returns the corresponding group uec
 // value
 // -----------------------------------------------------------------------
 CostScalar
 MultiColumnUecList::lookup (const ValueIdSet & key) const
 {
   CostScalar groupUec = csMinusOne;
   if ( contains( &key ) )
     groupUec = *(getFirstValue( &key ) );
   return groupUec;
 }

 // ------------------------------------------------------------------------------
 // This method is used for setting the multi-column UEC for unique indexes equal
 // to the row count. If the multi_column statistics for unique indexes, does not exist
 // it is created by setting the column list equal to the column list from
 // the index, and uec equal to the base row count of the table.
 // ------------------------------------------------------------------------------
 void MultiColumnUecList::initializeMCUecForUniqueIndxes(TableDesc &table,
 							const CostScalar & tableRowcount)
 {

   const NAFileSetList indexList = table.getNATable()->getIndexList();
   const ValueIdList &tableColumns = table.getColumnList();

   for (CollIndex listi = 0; listi < indexList.entries(); listi++)
   {
     if (indexList[listi]->uniqueIndex())
     {
       const NAColumnArray & uecCols = indexList[listi]->getIndexKeyColumns();
       ValueIdSet insertCols;
       for (CollIndex j = 0; j < uecCols.entries(); j++)
       {
 	Lng32 position = uecCols[j]->getPosition();
 	const ValueId & id = tableColumns[position];
 	insertCols.insert(id);
       } // for all columns in the index
       // see if the multi_column statistics exists for the given set of column
       CostScalar multUecRowCount = -1;

       if ((multUecRowCount = lookup(insertCols)) > 0)
       {
 	// if the row count from statistics equal to the table rowcount
 	if (multUecRowCount != tableRowcount)
 	  updatePair(insertCols,tableRowcount);
       }
       else
 	insertPair(insertCols,tableRowcount);
     } // if the index is unique
   } // for all indexes of the table
   return;
 } // initializeMCUecForUniqueIndexes

 //---------------------------------------------------------------------
 //MultiColumnUecList::getListOfSubsetsContainsColumns
 //
 //Input: list of columns
 //Output: List of ValueIdSet that contains the last column in the list
 //and other columns from the columnList only
 //Constraints: ColumnIds that are passed in can be VegRef that contains
 //             the base id for that column at the first level or it can
 //             be a the id corresponding to a index on the table.
 //---------------------------------------------------------------------

 LIST(ValueIdSet) *
 MultiColumnUecList::getListOfSubsetsContainsColumns(
   const ValueIdList & columns/*in*/,
   LIST(CostScalar)& uecCount/*out*/
   ) const
 {
   LIST(ValueIdSet) * result = new (HISTHEAP) LIST( ValueIdSet )(HISTHEAP);

   ValueIdSet allValueIds;
   ValueIdSet colValueIds;
   CollIndex members = columns.entries();
   ValueId column;
   //Following we try to get all the columnids that the column ids can
   //be associated with so that when we cross reference
   //multi-column uec list we don't miss out. We create two list
   //colValueIds -- all value Ids of the last/main column
   //allValueIds -- all value Ids of the rest of the columns

   for(CollIndex i=0; i<members;i++)
   {
     column = columns[i];
     const ItemExpr * itemExprForCol = column.getItemExpr();

     if( itemExprForCol->getOperatorType() == ITM_VEG_REFERENCE )
     {
       if(i==members-1)
         colValueIds = ((VEGReference *)itemExprForCol)->getVEG()->getAllValues();
       else
         allValueIds += ((VEGReference *)itemExprForCol)->getVEG()->getAllValues();
     }
     else if ( itemExprForCol->getOperatorType() == ITM_INDEXCOLUMN )
     {
       const BaseColumn * bc;
       bc = (BaseColumn *)((IndexColumn *)itemExprForCol)->getDefinition().getItemExpr();
       if(i==members-1)
       {
         colValueIds.insert( bc->getEIC() );
         //inserts the id this column has on base table
         colValueIds.insert( bc->getValueId() );
       }
       else
       {
         //inserts other id's this column might have on other indexes
         allValueIds.insert( bc->getEIC() );
         //inserts the id this column has on base table
         allValueIds.insert( bc->getValueId() );
       }
     }
     //for the case when id is the id in the base table
     //and for extra precaution
     if(i==members-1)
       colValueIds.insert(column);
     else
       allValueIds.insert( column );
   }

 #ifndef NDEBUG
   if(getenv("MDAM_MCUEC"))
   {
     fprintf(stdout, " \n\n-----List to be considered----\n");
     allValueIds.print();
   }
 #endif

   MultiColumnUecListIterator iter(*this);

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;

   CollIndex position = 0;   // position to enter in the list

   // following we traverse the mulcolueclist and see if
   //keyEntry contains the column, then check if it contains
   //any other column other than the columns passed in, if
   //it doesn't then it is a viable entry.
   //Ex. if abcd are the columns passed in, we first look for
   // d if the multi-columnUEC entry and if contains that then
   // we look for abc and if it contains any or all of them but
   // not any other column like 'e' or 'f' then it is a viable
   // multi-column histogram.
   for ( iter.getNext( keyEntry, uecEntry );
         keyEntry != NULL && uecEntry != NULL;
 	iter.getNext( keyEntry, uecEntry ) )
   {
     ValueIdSet tempSet = *keyEntry;
     tempSet.removeCoveredVIdSet(colValueIds);
     if(tempSet.entries() < keyEntry->entries())
     {
       tempSet.removeCoveredVIdSet(allValueIds);

       if(tempSet.entries()==0)
       {
       result->insertAt( position, *keyEntry );
       uecCount.insertAt( position, *uecEntry );
       position++;
       }
     }
   }

   return result;
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::largestSubset
 //
 //Input:ValueIdSet(columns) of column valueIds
 //Output:  Returns the a valueIdSet that matches most valueIds from ValueIdSet
 //(columns) but does not contain any other valueIds.
 //Ex: for valueIdSet (1, 4, 6)
 // we had mulcolUec for (1,4,6,8) and (1,4,7) and (1)
 // we would return (1) because the other two contains unmentioned columns
 // In case of tie between two list we select the one with largest correlation
 //Constraints: ValueIdSet passed in has to be base table Ids because
 //multicol is stored using base table ids.
 // -----------------------------------------------------------------------
 ValueIdSet
 MultiColumnUecList::largestSubset (const ValueIdSet & columns) const
 {
   ValueIdSet largestSet;        // the largest subset found thus far

   // This is the smallest independence factor, It determines the largest
   // correlation between the columns. Smaller the IP factor, larger is
   // the correlation between columns
   double smallestIPFactor = 1.0;

   if ( entries() == 0 OR columns.entries() == 0 )
     return largestSet;   // no subsets at all!

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;

   // we need to iterate through all entries in this list
   MultiColumnUecListIterator iter( *this );

   for ( iter.getNext( keyEntry, uecEntry );
 	keyEntry != NULL && uecEntry != NULL;
 	iter.getNext( keyEntry, uecEntry ) )
   {
     ValueIdSet tempSet = *keyEntry;
     //we do not want extra columns so after we remove the
     //columns asked for we should have 0 entries left.
     tempSet.removeCoveredVIdSet( columns );
     if( tempSet.entries() > 0 )
       continue;

     // want to find the entry in the multicolumnueclist which contains
     // group uec information about the most columns

     //We want to select this entry if it has more matching columns
     //Or if it has same number of columns then it has to have smaller
     // correlation factor

     if ( keyEntry->entries() >= largestSet.entries() )
     {
       double independenceFactor = 0.0;
       double SCproductUec = 1.0;

       for (ValueId keyCol = keyEntry->init();
 			    keyEntry->next(keyCol);
 			    keyEntry->advance(keyCol) )
 	  SCproductUec *= lookup(keyCol).value();

       independenceFactor = uecEntry->value()/SCproductUec;

       if ( ( keyEntry->entries() > largestSet.entries() )
 	  OR
 	  ( (keyEntry->entries() == largestSet.entries())
 	    AND (keyEntry->entries() > 0 )
 	    AND ( independenceFactor < smallestIPFactor ) ) )
       {
 	largestSet = *keyEntry;
 	smallestIPFactor = independenceFactor;
       }
     }
   } // end for
   return largestSet;
 }

 //---------------------------------------------------------------------
 //MultiColumnUecList::findDenom
 //
 //Input: list of columns
 //Output: Boolean. if there is a multi-column histogram exactly matching
 //the input then return true or return false
 //Constraints: ValueIds for the columns need to be base valueIds.
 //---------------------------------------------------------------------
 NABoolean
 MultiColumnUecList::findDenom (const ValueIdSet & columns) const
 {
   if ( entries() == 0 OR columns.entries() == 0 )
     return FALSE;   // no subsets at all!

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;

   // we need to iterate through all entries in this list
   MultiColumnUecListIterator iter( *this );

   for ( iter.getNext( keyEntry, uecEntry );
 	keyEntry != NULL && uecEntry != NULL;
 	iter.getNext( keyEntry, uecEntry ) )
   {
     if(columns == *keyEntry)
     {
       return TRUE;
     }
   }
   return FALSE;
 }


 // -----------------------------------------------------------------------
 // MultiColumnUecList::useMCUecforCorrPreds
 //
 // used to calculate an adjustment in the case of multiple predicates being
 // applied to highly correlated table columns
 //   (fn useMultiUecIfCorrelatedPreds(), subr of
 //    fn estimateCardinality() )
 //
 // given a list of <ValueId, CostScalar> pairs representing all of the
 // histograms which have been reduced, and the amount (reduction factor)
 // they've been reduced, return TRUE/FALSE if, in the list of these
 // predicates, there are 2+ from the same table for which we have
 // multi-column uec information and which are "highly correlated"
 // (defined below).
 //
 // If both of these conditions are met, then we supply a factor
 // "reductionAdjustment" which should be applied to the current rowcount
 // estimate in order to increase it beyond its current value, to take
 // into account the fact that we are applying multiple predicates to
 // highly correlated columns, which we assume means that beyond the
 // most selective predicate, the additional predicates are redundant
 // in part or whole (i.e., they remove the "same rows" as the other
 // predicates).
 //
 // We basically check if the row reduction has taken the number of rows
 // below the number of rows per multicolumnUec. If so reduction adjustment
 // will readjust the number of rows appropriately. Any call to this function
 // does not return a reduction adjustment that will let the number of rows be
 // less than one.
 // Constraints: The valueIds in NAHashDictionary should be base table valueIds
 // for those columns


 // all tablePtrs should be 4-byte aligned, so divide by 4
 // to get a better hash value
 ULng32 TableDescHashFn (const TableDesc & tablePtr)
 { return (ULng32)((Long)&tablePtr/8) ; }

 NABoolean
 MultiColumnUecList::useMCUecForCorrPreds (
      NAHashDictionary<ValueId, CostScalar> & predReductions, /* in/mod */
      const CollIndex numPredicates,                          /* in */
      const CostScalar& oldRowCount,                          /* in */
      const CostScalar& newRowCount,                          /* in */
      NABoolean largeTableNeedsStats,
      const ColStatDescList & scHists,
      CostScalar & reductionAdjustment)                  /* out */
 {
 #define USE_MULTI_COL_UEC_INFO

   if ( numPredicates < 2 )
     return FALSE;

 // -----------------------------------------------------------------------
 // First, we need to identify the table which has the most column
 // references in our list of <pred column, rowcount reduction> pairs.
 //
 // We use a hash-dictionary to implement an association list of
 // <TableDesc*, ValueIdList> pairs -- each ValueIdList indicates
 // which ValueId's we have that touch the TableDesc* in question.
 // -----------------------------------------------------------------------

   NAHashDictionary<TableDesc,ValueIdList> tableColumns (
        &TableDescHashFn,        7, TRUE, HISTHEAP );

   // we need to iterate over the <ValueId, CostScalar> pairs in order
   // to fill the NAHashDictionary above (tableColumns) with values
   NAHashDictionaryIterator<ValueId, CostScalar> predIter( predReductions );
   ValueId    * predColumn = NULL;
   CostScalar * reduction  = NULL;

   for ( predIter.getNext( predColumn, reduction );
 	predColumn != NULL && reduction != NULL;
 	predIter.getNext( predColumn, reduction ) )
   {
     const ValueId & iterId = *predColumn;
     // initially assume ITM_BASETABLE
     BaseColumn * iterExpr = iterId.castToBaseColumn();
     if ( iterExpr == NULL ) return FALSE; // unexpected condition
     TableDesc  * iterDesc = iterExpr->getTableDesc();

     if (iterDesc == NULL)
     {
       CCMPASSERT( iterDesc != NULL );
       return FALSE;
     }

     // do a lookup in the hash dictionary we're currently populating
     ValueIdList * colList = tableColumns.getFirstValue( iterDesc );

     if ( colList != NULL )
     {
       colList->insert( iterId ); // the joinValueIdPair which refs this tableDesc
     }
     else
     {
       colList = new (HISTHEAP) ValueIdList;
       colList->insert( iterId );
       tableColumns.insert( iterDesc, colList );
     }
   }

   // -----------------------------------------------------------------------
   // We have lists of references to tableDesc's; now, we iterate through them
   // to find the one with the most table columns.
   //
   // If there isn't a table with two column references, we quit.
   //
   // If there is a tie between two tables for most column refs, we
   // pick the one with higher multi-col UEC.
   // Table iterator to traverse the tables whose columns have been reduced
   NAHashDictionaryIterator<TableDesc,ValueIdList> tableIter( tableColumns );

   // This would contain the running descriptor of the table that we are
   // iterating from the list of tables with predicates.
   TableDesc   * iterDesc = NULL;

   // This would contain the columns on which we have local predicates for
   // the table described in iterDesc. If iterDesc contains T1, then iterList
   // would contain (a,b,c). If iterDesc is T2, iterList would be (a,b,c,d)
   ValueIdList * iterList = NULL;

   // This would contain the ValuedIdSet of the largest set of columns which
   // have multi-col stats available. The columns are from the amongst the
   // ones which have most predicates on them. Example, we have predicates
   // on columns, (a,b,c) of table T1 and (a,b,c,d) of table T2. At the end of the
   // loop, mostRefs would be 4, and if there is multi_col stats available for
   // T2 (a, b) and T2(a,b,c), then largestSubsetAmongSubsets would contain
   // (a,b,c), and mostRefdTable would contain T2

   // Table with multi-column stats on most columns. It would be T2 for us.
   TableDesc * mostRefdTable = NULL;

   // This would contain the list of columns which have most predicates on them
   // At the end of the loop it should contain T2 (a,b,c,d)
   ValueIdList * superList;

   // This would contain the count of columns of a table which has most local
   // predicates. For the above example it would be 4, at the end of the loop
   CollIndex mostSupersetRefs = 0;

   // largest set of columns which have multi-col UECs for any table.
   ValueIdSet largestSubsetAmongLargestSubsets;

   // contains the maximum number of columns which have multi-col UECs available
   CollIndex mostRefs = 0;

   CollIndex count = 1;

   // This will contain the multi-col UEC for largestSubsetAmongLargestSubsets.
   // It will be used to pick the final multi-col UEC in case there is a tie
   // between the number of columns of two tables, pick the ones with higher
   // multi-col UEC
   CostScalar multiColumnUec = csMinusOne;

     for ( tableIter.getNext( iterDesc, iterList );
 	  iterDesc != NULL && iterList != NULL;
 	  tableIter.getNext( iterDesc, iterList ) )
     {
       // in this loop we go through all the columns of this table that were
       // reduced collecting partial muli-col UEC lists. These partial multi-col
       // UEC lists could be overlapping or disjoint. Preference is given to
       // overlapping multi-col UECs over disjoint multi-col UECs. Partial
       // multi-col UECs are combined as follows:MC UEC needed (a, b, c, d)
       // For overlapping: MC-UEC available - (a, b, c) (c, d).
       // MC (a, b, c, d) = MC (a, b, c) * MC (c, d) / MC (c)
       // For disjoint: MC-UEC available (a, b) (c, d)
       // MC (a, b, c, d) = MC(a, b) * MC(c, d)

       if ( iterList->entries() > mostSupersetRefs )
       {
 		mostSupersetRefs = iterList->entries();
 		superList = iterList;
       }

       // Less than two columns of this table have been reduced.
       // Hence cannot use multi-col UEC
       if (iterList->entries() < 2) continue;

       // Contains the set of columns with highest UEC from amongst all tables
       // It contains the cumulative set of all columns which have multi-col UEC
       // available. For example, if we have predicates on column (a, b, c, d, e)
       // and multi-col UEC available for (a, b), (c, d). Then this would contain
       // (a, b, c, d). Later
       ValueIdSet cumulativeColSetWithMCUEC;

       // contains the multi col UEC for cumulativeColSetWithMCUEC, In case of partial
       // multi-col UEC, it is a function of all partial multi-col UECs for columns
       // in cumulativeColSetWithMCUEC, as described above.
       // In case there are two tables with same number of columns competing for
       // multi-colUEC this variable would contain the higher MC-UEC.
       // Correspondingly cumulativeColSetWithMCUEC would contain the column
       // set with higher multi-col UEC. For example, T1 (a, b, c, d) has
       // multi-col UEC = 1000 and T2 (a, b, c, d) has multi-col UEC = 1200
       // cumulativeColSetWithMCUEC would contain T2 (a, b, c, d) and
       // maxMultiColUec = 1200

       CostScalar maxMultiColUec = csOne;

       // colsWithReductions contain the columns remaining to be checked for
       // multi-col UEC
       ValueIdSet colsWithReductions(*iterList);

       // See is multi-column UEC exists for all the columns of the table which
       // have local predicates on them. That is the best case.

       CostScalar mcUec = lookup(colsWithReductions);
       if (mcUec.isGreaterThanZero() )
       {
         maxMultiColUec = mcUec;
         cumulativeColSetWithMCUEC.addSet(colsWithReductions);
       }
       else
       {
         // we had more than one column with reduction for this table
         // but there is no multi-col UEC.
         // If this iterList forms a unique index, we shall fake its multi-col UEC
         // with the rowcount, and continue. Else we shall display missing stats
         // warning for this.
         CostScalar baseRowCount = iterDesc->getNATable()->getOriginalRowCount();

         if (colsWithReductions.doColumnsConstituteUniqueIndex(iterDesc))
         {
           // get base rowcount of the table from original colStats
           // and set the multi-col UEC equal to this rowcount.
           insertPair(colsWithReductions, baseRowCount);

           maxMultiColUec = baseRowCount;
           cumulativeColSetWithMCUEC.addSet(colsWithReductions);
         }
         else
         {
           // we still have a possibility of finding multi-col UEC for this table
           // columns
           // Combine MC UEC of subset of columns from columns with reduction to get
           // MC UEC of larger set
           // statsCreated will return TRUE only if the MC stats for larger
           // set of columns were created using overlapping subset of columns

           ValueIdSet colsWithPreds = colsWithReductions;
           NABoolean statsCreated = createMCStatsForColumnSet(colsWithReductions,
                                                               cumulativeColSetWithMCUEC,
                                                               maxMultiColUec,
                                                               baseRowCount
                                                               );

           NABoolean displayWarning;
           if (isMCStatsUseful(colsWithPreds, iterDesc))
             displayWarning = TRUE;
           else
             displayWarning = FALSE;

           // Do not display the warning if we were able to create MC Stats for the
           // full set of columns from subset of overlapping column sets or
           // if the subset of columns requiring MC stats are orthogonal
           if (isMCStatsUseful(colsWithPreds, iterDesc))
           {
             // log selectivity from single column histograms in ustat log
             CostScalar sel = newRowCount /oldRowCount;
             displayMissingStatsWarning(iterDesc, (ValueIdSet)*iterList, largeTableNeedsStats, displayWarning, scHists, sel, FALSE, REL_SCAN);
           }
         } // end else (colsWithReductions.doColumnConstituteUniqueIndex(iterDesc))
       } // end else (mcUec.isGreaterThanZero() )

       // no more multi-col UECs for this table columns
       // Compare with the earlier computed MC UEC for tables
       // to see if this should be picked up. The table, which has
       // MC-UEC available for most columns is chosen. In case of a
       // tie between the two tables, we pick the one with higher MC-UEC.

       if ( (cumulativeColSetWithMCUEC.entries() > largestSubsetAmongLargestSubsets.entries() ) ||
 	( (cumulativeColSetWithMCUEC.entries() == largestSubsetAmongLargestSubsets.entries() ) &&
 	  (maxMultiColUec > multiColumnUec) ) )
       {
 	// This table has multi-col UEC on more number of columns than
 	// any of the tables we have iterated till now

 	// largestSubsetAmongLargestSubsets now contains the largest
 	// column set with multi-col UEC
 	largestSubsetAmongLargestSubsets = cumulativeColSetWithMCUEC;

 	// multi-col UEC for this set
 	multiColumnUec = maxMultiColUec;

 	// Table with most columns with multi-col UEC
 	mostRefdTable = iterDesc;
 	mostRefs = cumulativeColSetWithMCUEC.entries();;
 	}
     } // for (tableIter .... )

     // no table with reduction on more than one column
     if (mostSupersetRefs <= 1)
       return FALSE;

     // There are tables with more than one column, but none have multi-col UEC.
     if ( mostRefs <= 1 )
     return FALSE;


     // After having gotten the largest multi-col UEC for this table, get the
     // product of predReductions from single column histograms for those columns
     CostScalar uecReduction = csOne;

     if (largestSubsetAmongLargestSubsets.entries() >= 2)
     {
      // accumulate single column reductions of all columns with multi-col UEC
       for ( ValueId i = largestSubsetAmongLargestSubsets.init();
                 largestSubsetAmongLargestSubsets.next( i );
                 largestSubsetAmongLargestSubsets.advance( i) )
           {
             uecReduction *= (*(predReductions.getFirstValue( &i )));
           }
     }

     //Initial row count of the table before any predicates were applied.
     //Need to use this because multiColumnUec is for that table only so
     //need to stay consistent.
     const ColStatDescList & statsList = mostRefdTable->getTableColStats();

     const CostScalar & initialRowCount = statsList[0]->getColStats()->getRowcount();

     // Following set of conditions are used to check for correlation:
     // and subsequently decide what reduction adjustment to pick
     if(   ( oldRowCount * uecReduction ) < ( initialRowCount / multiColumnUec )
       AND ( initialRowCount / multiColumnUec ) > csOne
       AND (uecReduction.getValue() > 0.0 ) )
     {
       // correlation between the columns exist, and therefore we will be able
       // to benefit from the multi-col UEC
       reductionAdjustment = ( csOne / uecReduction ) * ( csOne / multiColumnUec );
     }
     else if ( (oldRowCount * uecReduction).isLessThanOne()
       AND ( oldRowCount * uecReduction ) < ( initialRowCount / multiColumnUec )
       AND newRowCount.isGreaterThanZero() /* != csZero */)
     {
       // New rowcount obtained from single-column histograms is less than zero.
       // set the reductionAdjustment, so that the final rowcount is set to one
       reductionAdjustment = csOne / newRowCount;
     }
     else
     {
       // Columns are not correlated, so no reduction adjustment is needed
       reductionAdjustment = csOne;
     }

     // contains either the largest number of columns for which the
     // multi-col uec exists or the number of columns which were reduced
     // which ever is smaller.

     count = MINOF( largestSubsetAmongLargestSubsets.entries(),
 			     numPredicates );
     return TRUE;
 }

 NABoolean
 MultiColumnUecList::createMCStatsForColumnSet(ValueIdSet colsWithReductions,
                                               ValueIdSet & cumulativeColSetWithMCUEC,
                                               CostScalar & maxMultiColUec,
                                               CostScalar baseRowCount
                                               )
 {
   // accumulate all partial multi-col UEC for columns with predicates
   // for this set. In case of a tie send the one with larger correlation

   cumulativeColSetWithMCUEC = largestSubset(colsWithReductions);

   // If largestSubset.entries() == 1, then that means there are no
   // multi-col UECs available for this table, so there is nothing
   // that we can do. If any multi-col UEC was returned, then we have
   // a chance of availability of partial multi-col UECs, that we can
   // use

   if (cumulativeColSetWithMCUEC.entries() <= 1)
     return FALSE;

   // combine this MC UEC with any partial overlapping multi-col UECs
   maxMultiColUec = lookup(cumulativeColSetWithMCUEC);

   // get the remaining columns for which multi-col UEC could not be found
   colsWithReductions.subtractSet(cumulativeColSetWithMCUEC);

   // In the following two method we try to create multi-column UEC list
   // for larger column set using multi-column UEC for smaller set of columns
   // For this we need to traverse the MC list a number of times.
   // In the following method, we shall create a new MC list.
   // This list contains MC-UEC for only those column set, which include
   // all remaining columns

   MultiColumnUecList * mcListForRemainingCols = createMCListForRemainingCols(
 					    colsWithReductions,
 					    cumulativeColSetWithMCUEC);

   // We will have two flags to indicate whether the MC stats for a larger set of
   // columns was created using overlap subset of columns or disjoins set of columns
   // If the stats were created using overlap set of columns, we would have been able
   // to capture correlation pretty well and we need not give a warning. But if the
   // stats were created using disjoint set of columns, then we should give a warning
   NABoolean statsCreatedWithOverlap = FALSE;
   NABoolean statsCreatedWithDisjoint = FALSE;

   // Use the MC list created above to collect all relevant partial
   // overlapping multi-col UECs first to get the correlation for
   // larger set of columns
   statsCreatedWithOverlap = mcListForRemainingCols->createMCUECWithOverlappingColSets(
 					    colsWithReductions,
 					    cumulativeColSetWithMCUEC,
 					    maxMultiColUec,
 					    baseRowCount);

   // If any stats were created from partial multi-col UEC, save them in the
   // multi-col UEC list for future use.
   if (statsCreatedWithOverlap)
   {
     // After all the calculations, the MC UEC should not exceed the
     // original rowcount of the table
     maxMultiColUec = MINOF(maxMultiColUec, baseRowCount);

     this->insertPair(cumulativeColSetWithMCUEC, maxMultiColUec);

     // if MC UEC has reached the limit of rowcount, or we have been
     //  able to generate stats for the complete set, return TRUE
     // else continue to build MC stats from smaller subset of columns
     if ( (maxMultiColUec == baseRowCount) ||
          (colsWithReductions.entries() <= 1) )
       return TRUE;
   }

   // for the remaining columns, use relevant partial disjoint multi-col UECs
   // if computed multi-col UEC has already reached the limit of rowcount
   // then there is no need to proceed further, as we will not get any benefit

   if ( ( maxMultiColUec < baseRowCount) AND
       (colsWithReductions.entries() > 1) )
     statsCreatedWithDisjoint = mcListForRemainingCols->createMCUECWithDisjointColSets(
 						  colsWithReductions,
 						  cumulativeColSetWithMCUEC,
 						  maxMultiColUec,
 						  baseRowCount);

   // After all the calculations, the MC UEC should not exceed the
   // original rowcount of the table
   maxMultiColUec = MINOF(maxMultiColUec, baseRowCount);

   // If any stats were created from partial multi-col UEC, save them in the
   // multi-col UEC list for future use.
   if (statsCreatedWithDisjoint)
     this->insertPair(cumulativeColSetWithMCUEC, maxMultiColUec);

   if (statsCreatedWithOverlap || statsCreatedWithDisjoint)
     return TRUE;
   else
     return FALSE;
 } // createMCStatsForColumnSet

 // -----------------------------------------------------------------------
 // MultiColumnUecList::getUecForMCJoin
 //
 // used by multi-column join code (fn CSDL::useMultiUecIfMultipleJoins(), subr of
 //                                 fn CSDL::estimateCardinality() )
 //
 // first, we list the struct used internally and the subroutine

 // -----------------------
 // struct MCJoinPairStruct
 //
 // this struct is used in the ::getUecForMCJoin() routine to keep track of
 // the correspondence between :
 //
 // (1) a pair of ValueIdSets (representing the columns from two tables);
 //
 // (2) the larger of these two sets' multi-column uecs (needed by the
 // calling function) ;
 //
 // (3) the join predicates that are not covered by the two ValueIdSets
 // (i.e., the preds that we need to address later in order to correctly estimate
 // the entire multi-column join's rowcount)
 // -----------------------

 struct MCJoinPairStruct
 {
   ValueIdSet       tableOneCols_ ;
   ValueIdSet       tableTwoCols_ ;
   CostScalar       prodInitUec_ ;
   CostScalar       multiColUec_ ;
   CostScalar       leftMCUec_;
   CostScalar	   baseRowCount_ ;
   NAList<ValueIdSet> * remainingJoinPairs_;

   // stupid Collections classes require this fn! argh!
   NABoolean operator == (const MCJoinPairStruct & rhs)
   { return tableOneCols_ == rhs.tableOneCols_ && tableTwoCols_ == rhs.tableTwoCols_ ; }

 public:
   MCJoinPairStruct():remainingJoinPairs_(new (CmpCommon::statementHeap())
                       NAList<ValueIdSet>(CmpCommon::statementHeap()) ) {}

   ~MCJoinPairStruct(){ delete remainingJoinPairs_;}

 } ;

 // -----------------------
 // global function findMatchingColumns()
 //
 // subroutine of MultiColumnUecList::getUecForMCJoin()
 //
 // Given a ValueIdSet (t1Cols) representing the columns from one table,
 // figure out the corresponding columns from the second table
 // (t2Cols==retval) so that a subset of the join predicates (joinPairs)
 // are covered by the two sets of table columns.  Any joinPairs not
 // covered are placed into (remainingPairs).
 //
 // This is an ugly little subroutine, not placed in the main code in order
 // to (attempt to) keep things a bit clearer.
 // -----------------------

 ValueIdSet
 MultiColumnUecList::findMatchingColumns (
   const ValueIdSet        & t1Cols,     /* in  */
   const LIST(ValueIdList) & joinPairs,  /* in  */
   LIST(ValueIdList) & remainingPairs,   /* out */
   CostScalar        & maxInitUecProduct,/* out */
   CostScalar        & minInitUecProduct,/* out */
   NABoolean	    & insuffMCInfo	/* out */
   ) const
 {
   ValueIdSet t2Cols; // #retval

   for ( CollIndex i = 0 ; i < joinPairs.entries(); i++ )
   {
     NABoolean inserted = FALSE ;
     if ( joinPairs[i].entries() > 2 )
     {
       // we cannot handle a join between many (>2) columns -- do not
       // use this join predicate
       remainingPairs.insert( joinPairs[i] );
       continue ;
     }

     const ValueId & firstId = joinPairs[i][0];
     const ValueId & secondId = joinPairs[i][1];

     for ( ValueId id = t1Cols.init();
 	  t1Cols.next( id );
 	  t1Cols.advance( id ) )
     {
       if ( id == firstId || id == secondId )
       {
 	// keep a running total of the product of the columns'
 	// initial uec. If single column stats do not exist
 	// for any column, then its multi-column uec should be ignored
 	// For example: if there is single column statistics for column I1
 	// then the MC UEC for any set containing I1 (eg: (I1, I2) ) cannot
 	// be considered for Join

 	CostScalar uec1;
 	CostScalar uec2;

 	uec1 = this->lookup( firstId );
 	if (uec1.isLessThanZero() )
 	{
 	  insuffMCInfo = TRUE;
 	  // eventhough we are returning t2Cols, it shall not be used
 	  return t2Cols;
 	}
 	else
 	{
 	  uec2 = this->lookup( secondId );
 	  if (uec2.isLessThanZero() )
 	  {
 	    insuffMCInfo = TRUE;
 	    // eventhough we are returning t2Cols, it shall not be used
 	    return t2Cols;
 	  }
 	}

 	if ( id == firstId )
 	  t2Cols.insert( secondId );
 	else
 	  t2Cols.insert( firstId );
 	inserted = TRUE ;

 	maxInitUecProduct *= MAXOF( uec1, uec2 );
 	minInitUecProduct *= MINOF( uec1, uec2 );
 	break;
       }
     }

     if ( NOT inserted )
       remainingPairs.insert( joinPairs[i] );
   }

   CCMPASSERT( t2Cols.entries() == t1Cols.entries() ); // sanity check

   return t2Cols ;
 }

 // A help function to break the tie of two table descs. The side with
 // smaller ipFactor, smaller mcUec or larger valueId value of the
 // first column wins.
 Int32 tieBreaker(
                  CostScalar ipFactor1, CostScalar mcUec1, TableDesc* tabDesc1,
                  CostScalar ipFactor2, CostScalar mcUec2, TableDesc* tabDesc2
                 )
 {
   if (ipFactor1 < ipFactor2 )
       return 1;

   if (ipFactor1 > ipFactor2 )
       return -1;

   if (mcUec1 < mcUec2 )
       return 1;

   if (mcUec1 > mcUec2 )
       return  -1;

   if ( tabDesc1 == tabDesc2 )
      return 0;

   if ((CollIndex) ((tabDesc1->getColumnList())[0]) >
       (CollIndex) ((tabDesc2->getColumnList())[0])
      )
     return 1;
   else
     return -1;
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::getUecForMCJoin
 //
 // Given a list of ValueIdLists representing the two (or more) join
 // predicates between (hopefully) two tables, return TRUE/FALSE if we
 // have the necessary multi-column uec information about some of the
 // columns involved in this join.
 //
 // Return this multi-column uec number (the larger of the two tables's
 // multi-column uec for the columns in question), and the join columns
 // which weren't used to generate this number.
 //
 // i.e., if we do
 //    "sel * from T1,T2 where T1.a=T2.b AND T1.c=T2.d",
 // we need MC-info on (T1.a,T1.c) and (T2.b,T2.d) -- if this exists,
 // then return TRUE and set maxMultiColUec to be the larger of the two
 // corresponding multi-column uec values for (T1.a,T1.c) & (T2.b, T2.d)
 //
 // as a more general case, if we do

 //    "sel * from T1,T2 where T1.c1=T2.c1 AND T1.c2=T2.c2 AND
 // ... T1.cn=T2.cn", then we want to return the largest ValueIdSets
 // (t1.1,...,t1.m) (t2.1,...t2.m) such that there is exactly one t2.1
 // for every t1.1 -- any remaining ValueIdSets in joinValueIdPairs are
 // returned to the calling function, which must then apply
 // single-column selectivity for them.
 //
 // retval :
 //
 //   TRUE: If we can return MC-info for at least two columns from each
 //   table, return TRUE and set the 2nd and 3rd parameters to be the
 //   column subsets from the two tables in question.
 //
 //   FALSE: If we don't have MC-info for at least two columns from each
 //   table, then simply return FALSE.
 //
 // NB: this routine recurses if necessary.  Overall, we're using a greedy
 // algorithm to try to cover the join column ValueId's with multi-column
 // uec information.  This greedy algorithm might not find the best overall
 // answer, but a more general solution will have nasty time-complexity.
 // -----------------------------------------------------------------------
 NABoolean
 MultiColumnUecList::getUecForMCJoin (
   LIST(ValueIdList) & joinValueIdPairs,	    /* in/out */
   const NABoolean     largeTableNeedsStats, /* in  */
   const Join        * expr,
   CostScalar        & prodInitUec,       /* out */
   CostScalar        & multiColUec,	    /* out */
   CostScalar        & baseRCForMCUEC,	 /* out */
   CostScalar        & leftMCUec,	 /* out */
   NABoolean         & checkForLowBound,  /* out */
   NABoolean         & joinOnUnique,      /* out */
   const ColStatDescList & colStats,
   CostScalar        redFromSC
   )
 {

   if ( joinValueIdPairs.entries() < 2 )
     return FALSE;

   // first, we need to divide the ValueId's in joinValueIdPairs into two list
   // of table columns, corresponding to the two tables that are being
   // joined together
   // We use a list, as we want to know exactly which column is being joined
   // to which column of the other table, so we can compare their base UECs
   // correctly while looking for containment.
   // For other purposes where we can do with a set, we will keep a copy
   // of the set too
   ValueIdList tableOneList, tableTwoList;
   ValueIdSet tableOneSet, tableTwoSet;
   CollIndex i, j;

   // -----------------------------------------------------------------------
   // First, we need to identify whether we have two tables T1, T2
   // such that we're joining two columns (minimum) between these
   // two tables
   //
   // We use an hash-dictionary to implement an association list of
   // <TableDesc*, ValueIdList> pairs -- each ValueIdList indicates
   // the list of columns from this table participating in join
   // -----------------------------------------------------------------------

   NAHashDictionary<TableDesc,ValueIdList> tableColumns (
        &TableDescHashFn,        7, TRUE, HISTHEAP );
   //            hash fn, initsize, uniq, heap)

   // joinValueIdPairs indicate the list of joining pairs
   // example, T1.a = T2.a and T1.b = T2.b. The pairs would be
   // T1.a, T2.a and T1.b, T2.b
   for ( i = 0; i < joinValueIdPairs.entries(); i++ )
   {
     // get the list of joining columns
     const ValueIdList & joinList = joinValueIdPairs[i];

     // for each joining column, get its bast table and
     // see if this column belongs to the current table we are looking at
     for ( j = 0; j < joinList.entries(); j++ )
     {
       const ValueId & iterId = joinList[j];
       // initially assume ITM_BASETABLE
       const BaseColumn * iterExpr = iterId.castToBaseColumn();
       if ( iterExpr == NULL )
 	      return FALSE; // unexpected condition

       TableDesc  * iterDesc = iterExpr->getTableDesc();
       if (iterDesc == NULL)
       {
         CCMPASSERT( iterDesc != NULL );
         return FALSE;
       }

       // do a lookup in the hash dictionary we're currently populating
       // to see if an entry for that table already exists
       ValueIdList * colList = tableColumns.getFirstValue( iterDesc );

       if ( colList != NULL )
       {
         // if the entry for that table already exists, add the column to the columnList
         // of this table. This would be true while traversing the second and beyond
         // joining columns of this table
 	colList->insert( iterId );
       }
       else
       {
         // if this is the first entry for this table in hash dictionary
         // create an entry with the tableDesc and the columnId
 	colList = new (HISTHEAP) ValueIdList( );
 	colList->insert( iterId );
 	tableColumns.insert( iterDesc, colList );
       }
     }
   }

   // -----------------------------------------------------------------------
   // We have lists of references to tableDesc's; now, we iterate through them
   // and make sure that we have two tableDesc's that are referenced a minimum
   // of twice each.
   //
   // If we don't find two tables with two references each, we quit.
   //
   // If there were >2 tables like this, we take the two tables which were
   // referenced the most.  If there are ties, we pick the one with most correlation
   // If the correlation is identical we pick the one with smaller MC UECs
   // -----------------------------------------------------------------------

   NAHashDictionaryIterator<TableDesc,ValueIdList> tableIter( tableColumns );
   TableDesc     * tableDesc = NULL;
   ValueIdList * colList = NULL;
   ValueIdSet colSet;

   CollIndex mostRefs = 0,
 	    secondMost = 0;

   ValueIdSet colSetForMostRefs, colSetForSecondMost;
   CostScalar mcUecCurrent = COSTSCALAR_MAX;
   CostScalar mcUecMostRefd = COSTSCALAR_MAX;
   CostScalar mcUecSecondMost = COSTSCALAR_MAX;

   CostScalar ipFactorCurrent = csOne;
   CostScalar ipFactorMostRefd = csOne;
   CostScalar ipFactorSecondMost = csOne;
   // Keep a flag to use later to differentiate the conditions that MC stats
   // cannot be used because they are not needed or because the stats are missing
   NABoolean mcStatsPresent = TRUE;

   // the table with the most refs is tableOne; the table with the 2nd most
   // refs is tableTwo (not that this matters ... :-)

   TableDesc * tableOneDesc = NULL;
   TableDesc * tableTwoDesc = NULL;

   for ( tableIter.getNext( tableDesc, colList );
         tableDesc != NULL && colList != NULL;
 	tableIter.getNext( tableDesc, colList ) )
   {
     colSet = (ValueIdSet) *colList;
     if (colSet.entries() < 2 )
     {
       // This table joined on only one column, hence cannot be
       // used for MC adjustment
       continue;
     }

     mcStatsPresent = TRUE; // assume mc stats is available for this colSet

     mcUecCurrent = lookup(colSet);
     // if the columns are unique use basetable rowcount as the MC UEC
     NABoolean colsUnique = FALSE;
     if (mcUecCurrent == csMinusOne)
     {
       // MC stats do not exist for the joining columns
       // check to see if it is unique and if we can use rowcount as the MCUEC
       colsUnique = colSet.doColumnsConstituteUniqueIndex(tableDesc);
       if (colsUnique)
       {
         // if colSet == unique index, the MC stats for that would have been added
         // much earlier at the time of fetching stats. This is for cases when
         // colSet is a super set of unique index.
 	// get base rowcount of the table from original colStats
 	// and set the multi-col UEC equal to this rowcount
         // save it for later use too
 	mcUecCurrent = tableDesc->getNATable()->getOriginalRowCount();
 	this->insertPair(colSet, mcUecCurrent);
       }
       else
       {
         // Since the stats are missing, use just the colSet entries to make a decision
         // We will give the warning regarding missing stats later, based on other
         // heuristics
         mcStatsPresent = FALSE;
       }
     }

     CostScalar SCproductUec = csOne;

     // compute correlation factor for this set. Smaller the ipFactor
     // larger the correlation. ipFactor =1 means there
     // is no correlation, or the columns are unique.
     // If there is a conflict we pick the one with larger correlation
     if (!colsUnique && mcStatsPresent)
     {
       for (ValueId keyCol = colSet.init();
 			    colSet.next(keyCol);
 			    colSet.advance(keyCol) )
 	  SCproductUec *= lookup(keyCol).value();

       ipFactorCurrent = mcUecCurrent/SCproductUec;
     }
     else
       ipFactorCurrent = csOne;

     // For choosing the most referenced tables, pick the one joining on larger number
     // of columns. If there are more than one tables joining on same number of columns
     // pick the one with larger correlation (smaller ipFactor). If there are more than
     // one table with same ipFactor and same number of joining columns, pick the one with
     // smaller MCUEC.
     // if MC Stats are missing just use number of columns as the guiding factor in
     // deciding which tables to choose for MC adjustment.
     // We are doing this here as later we use heuristics to decide whether MC stats
     // warning is needed or not. We also use heuristic later to construct MC stats
     // using smaller subset of columns. Hence missing stats at this point does not mean
     // that we are done.
     if (CmpCommon::getDefault(COMP_BOOL_41) == DF_ON)
        mcStatsPresent = FALSE;

     if ( ( colSet.entries() > mostRefs )
        ||
          (
           (mcStatsPresent && colSet.entries() == mostRefs) &&
           (tableOneDesc == NULL ||
            tieBreaker(ipFactorCurrent, mcUecCurrent,tableDesc,
                       ipFactorMostRefd, mcUecMostRefd, tableOneDesc) == 1)
          )
        )
     {
       secondMost = mostRefs;
       colSetForSecondMost = colSetForMostRefs;
       mcUecSecondMost = mcUecMostRefd;
       tableTwoDesc = tableOneDesc;
       ipFactorSecondMost = ipFactorMostRefd;

       mostRefs = colSet.entries();
       colSetForMostRefs = colSet;
       mcUecMostRefd = mcUecCurrent;
       tableOneDesc = tableDesc;
       ipFactorMostRefd = ipFactorCurrent;
     }
     else if ( ( colSet.entries() > secondMost )
             ||
             ((mcStatsPresent && colSet.entries() == secondMost) &&
              ( tableTwoDesc == NULL ||
                tieBreaker(ipFactorCurrent, mcUecCurrent,tableDesc,
                           ipFactorSecondMost, mcUecSecondMost, tableTwoDesc) ==
 1)
              )
             )
     {
       secondMost = colSet.entries();
       colSetForSecondMost = colSet;
       mcUecSecondMost = mcUecCurrent;
       tableTwoDesc = tableDesc;
       ipFactorSecondMost = ipFactorCurrent;
     }
   }

   // If there was only one table with MC stats, then there is nothing we can do
   // return.
   if ( secondMost == 0 )
     return FALSE;

   // did that go alright? check to make sure:
   // It would not do any major harm
   // even if the tableDescs are null, because later these are used for
   // for finding the columns which belong to the tableDescs, and if these
   // are NULL, then that means that no columns can be used for MC estimates
   // which should not be fatal enough to cause the crash.
   if ((tableOneDesc == NULL) || (tableTwoDesc == NULL ))
   {
     CCMPASSERT( tableOneDesc != NULL && tableTwoDesc != NULL );
     return FALSE;
   }

   // now iterate again and grab the tables which had the mostRefs & secondMost
   // most and second most referenced tables have their table descriptors saved
   // in tableOneDesc and tableTwoDesc
   tableIter.reset();

   ValueIdSet joinsUsed;

   for ( tableIter.getNext( tableDesc, colList );
         tableDesc != NULL && colList != NULL;
 	tableIter.getNext( tableDesc, colList ) )
   {
     if ( ( tableDesc == tableOneDesc) || ( tableDesc == tableTwoDesc) )
     {
 	joinsUsed.insert((ValueIdSet) * colList);
     }
   }

   if ( joinsUsed.entries() < 2 ) // something weird, but legal (e.g., T1.a=T1.b=T2.a=T2.b)
     return FALSE; // cannot handle this case, punt!

   // -----------------------------------------------------------------------
   // Now we need to create a "clean" list of joinValueIdPairs (to simplify
   // future processing), removing any column references that do not refer
   // to our two TableDesc*'s.  Initially, we just grab the subset of the
   // joinValueIdPairs that referenced the two tables.
   //
   // Any unusable entries in joinValueIdPairs are stored in a new list
   // (joinPairsNotUsed), which we'll add to our "unused join preds" retval
   // before returning from the function.
   // -----------------------------------------------------------------------

   // joinPairsUsed + joinPairsNotUsed = joinValueIdPairs
   NAList<ValueIdList> joinPairsUsed(CmpCommon::statementHeap()),
                       joinPairsNotUsed(CmpCommon::statementHeap());

   for ( i = 0; i < joinValueIdPairs.entries(); i++ )
   {
     ValueIdSet joiningCols = joinsUsed;
     ValueIdSet joinColPair = joinValueIdPairs[i];

     if ( joiningCols.intersectSet( joinColPair ).entries() >= 2 )
       joinPairsUsed.insert( joinValueIdPairs[i] );
     else
       joinPairsNotUsed.insert( joinValueIdPairs[i] );
   }

   // -----------------------------------------------------------------------
   // Now remove table references from joinPairsUsed that don't match
   // either tableOneDesc or tableTwoDesc.  Note that this step is
   // potentially buggy; if we're doing something like
   //
   //    {T1.a=T2.a=T3.a,T1.b=T2.b=T3.b, T1.c=T2.c}
   //
   // then calculating the resulting rowcount by just looking at
   //
   //    {T1.a=T2.a,T1.b=T2.b,T1.c=T2.c}
   //
   // is inexact -- if we were being really careful (which would require a
   // *ton* more checking, which isn't warranted due to the little we'd
   // gain), then we wouldn't throw away the reference to T3.  But that's
   // what we're doing right now.  So there.
   //
   // We are trying to handle the case where we do the following
   // "transformation":
   //
   //   {T1.a=T2.a=T3.a, T1.b=T4.b, T1.c=T2.c=T5.c} ==>
   //   {T1.a=T2.a, T1.c=T2.c}
   //
   // This happens in TPC-D, for example.
   //
   // NB: If we have a join of the form :
   //
   //   {T1.a=T1.b=T2.a}
   //
   // We punt on that one -- put it into joinPairsNotUsed.
   // -----------------------------------------------------------------------

   ValueIdList empty;

   // Go thru all pairs of joining columns
   for ( i = 0; i < joinPairsUsed.entries(); i++ )
   {
     NABoolean tableOneRefd = FALSE,
 	      tableTwoRefd = FALSE;
     const ValueIdList & joinList = joinPairsUsed[i];
     ValueIdSet joinSet( joinList ); // convenience : list->set

     // For each pair get all columns. Example if T1.a = T2.a = T3.a
     // then the joinlist will contain, T1.a, T2.a, T3.a
     // From this list keep only those columns which have been chosen
     // to do MC adjustment. These columns are referred by tableOneDesc
     // and tableTwoDesc
     for ( j = 0; j < joinList.entries(); j++ )
     {
       const ValueId & iterId = joinList[j];
       // initially assume ITM_BASETABLE
       const BaseColumn * iterExpr = iterId.castToBaseColumn();
       if ( iterExpr == NULL )
 	      return FALSE ; // unexpected condition

       const TableDesc  * iterDesc = iterExpr->getTableDesc();
       if (iterDesc == NULL)
       {
         CCMPASSERT( iterDesc != NULL );
         return FALSE;
       }

       if ( iterDesc == tableOneDesc )
 	      tableOneRefd = TRUE;
       else if ( iterDesc == tableTwoDesc )
 	      tableTwoRefd = TRUE;
       else
 	      joinSet.remove( iterId );
     }
     // make sure that each table was ref'd exactly once (i.e., both
     // bools are TRUE and there are only two entries left)
     if ( NOT ( tableOneRefd AND tableTwoRefd AND joinSet.entries() == 2) )
     {
       joinPairsNotUsed.insert( joinPairsUsed[i] );
       joinPairsUsed[i] = empty;
     }
     else
     {
       ValueIdList tmpPair = joinSet;
       joinPairsUsed[i] = tmpPair;
     }
   }

   // -----------------------------------------------------------------------
   // Now filter out all of the empty lists inside joinPairsUsed -- to make
   // the previous loop's logic simpler, we put off dealing with the "no
   // auto increment" until now.
   // -----------------------------------------------------------------------
   for ( i = 0; i < joinPairsUsed.entries(); /* no auto increment */ )
     if ( joinPairsUsed[i].entries() == 0 )
       joinPairsUsed.removeAt( i );
     else
       i++ ;

   // -----------------------------------------------------------------------
   // Now we iterate through the "cleaned-up" list of joinPairs, and collect
   // the ValueId's of joining columns from each table for which MC will be used
   //
   // First, we check again : if joinPairsUsed doesn't have at least two
   // entries, then we quit.
   // -----------------------------------------------------------------------

   if ( joinPairsUsed.entries() < 2 )
     return FALSE;

   for ( i = 0; i < joinPairsUsed.entries(); i++ )
   {
     const ValueIdList & joinList = joinPairsUsed[i];

     // if any column has !=2 columns, then something is very wrong in
     // the previous logic, and we want to know about it!
     // Not from the customer though. If the entries are not equal
     // to 2, then that means we shall not be able to use MC UEC
     if (joinList.entries() != 2 )
     {
       CCMPASSERT( joinList.entries() == 2 );
       return FALSE;
     }

     for ( j = 0; j < joinList.entries(); j++ )
     {
       const ValueId & iterId = joinList[j];
       // initially assume ITM_BASETABLE
       const BaseColumn * iterExpr = iterId.castToBaseColumn();
       if ( iterExpr == NULL )
 	      return FALSE ; // unexpected condition

       const TableDesc  * iterDesc = iterExpr->getTableDesc();
       if (iterDesc == NULL)
       {
         CCMPASSERT( iterDesc != NULL );
         return FALSE;
       }

       if ( iterDesc == tableOneDesc )
       {
 	      tableOneSet.insert( iterId );
               tableOneList.insert(iterId);
       }
       else if ( iterDesc == tableTwoDesc )
       {
 	      tableTwoSet.insert( iterId );
               tableTwoList.insert(iterId);
       }
       else
       {
 	      // it's a table column not associated with either of
 	      // the two tables we're joining -- however, we already
 	      // tried to remove all of these references!  abort!
 	      // If there is a table column not associated with any of the two tables
 	      // then in the worst case we shall not be able to use MC stats, but
 	      // it should not be a work stoppage
 	      CCMPASSERT( FALSE );
 	      return FALSE ;
       }
     } // j-loop
   } // i-loop

   // for whatever reason, we don't have the situation where there are
   // at least two candidate columns from each of two tables; abort
   if ( tableOneSet.entries() < 2 OR tableTwoSet.entries() < 2)
     return FALSE;

   // See if the join is being done on non-key columns. If it is then
   // we don't want to use MC uec. This is because through MC stats we are
   // not able to correctly estimate the containment of non-unique columns
   // into the other. For Example:
   // T1(a,b,c,d,e ...) key is a,b,c rowcount = 1Mil
   // T2(a,b,c,d,e ) key is a,b,c rowcount = 1Mil

   // Uec(1.a,1.b) = 1000
   // Uec(2.a,2.b) = 10000

   // Using MC we force the join estimate to be 1M x 1M / 10000
   // Here we assumed that the 1000 values of 1.a,1.b are subset
   // of the 10000 set of 2.a,2.b. This is usually not correct
   // and results in overestimation of the result of the cardinality.
   // This would have been most likely correct if 2.a,2.b was the
   // key for T2 as this would be foreign key join, so we allow that exception.

   // CQD HIST_SKIP_MC_FOR_NONKEY_JOIN_COLUMNS is used to control
   // this behaviour. 'OFF' for this CQD means we use MC stats even
   // for joins on non-key columns.  08/24/04

   NABoolean tableOneSetUnique = tableOneSet.doColumnsConstituteUniqueIndex(tableOneDesc);
   NABoolean tableTwoSetUnique = tableTwoSet.doColumnsConstituteUniqueIndex(tableTwoDesc);

   if (CURRSTMT_OPTDEFAULTS->histSkipMCUecForNonKeyCols()  )
   {
     // Check if the primary key or a unique index of either table is fully covered
     // by the columns on which this table is being joined. If none of the tables
     // are being joined on primary key, return
     if ( !( tableOneSetUnique || tableTwoSetUnique ) )
       return FALSE;
   }

   if (tableOneSet.entries() != tableTwoSet.entries() )
   {
     // The number of columns being joined from two tables
     // should be equal, if not, we cannot use multi-column UEC
     CCMPASSERT( tableOneSet.entries() == tableTwoSet.entries() );
     return FALSE;
   }

   NABoolean displayWarning = TRUE;

   if ( (CmpCommon::getDefault(HIST_MC_STATS_NEEDED) == DF_ON)
 	AND largeTableNeedsStats )
   {
     NAString tableOneName(CmpCommon::statementHeap()),
              tableTwoName(CmpCommon::statementHeap()); // table names

     NAString tableOneCols(CmpCommon::statementHeap()),
              tableTwoCols(CmpCommon::statementHeap()); // column names

     if ((this->lookup( tableOneSet )).isLessThanZero() )
     {
       // There is no multi-col uec list for tableOneSet. If tableOneSet is unique
       // fake multi-col uec, by setting its UEC equal to rowcount
       if (tableOneSetUnique)
       {
 	// get base rowcount of the table from original colStats
 	// and set the multi-col UEC equal to this rowcount
 	CostScalar baseRowCount = tableOneDesc->getNATable()->getOriginalRowCount();
 	this->insertPair(tableOneSet, baseRowCount);
       }
       else
       {
 	if (( tableTwoSetUnique ) || (!CURRSTMT_OPTDEFAULTS->histSkipMCUecForNonKeyCols()) )
 	{
 	  // But it is being joined to unique columns so display missing stats warning
 	  // if the warning could be useful. The warning level should be
           // greate than or equal to 3 for the MC warnings from Join to be displayed
           if ( (CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() > 3) &&
                (isMCStatsUseful(tableOneSet, tableOneDesc)))
             displayWarning = TRUE;
           else
             displayWarning = FALSE;

           displayMissingStatsWarning(tableOneDesc,
                                     tableOneSet,
                                     largeTableNeedsStats,
                                     displayWarning,
                                     colStats,
                                     redFromSC,
                                    FALSE,
                                     REL_JOIN);
 	}
       }
     }

     //cs if ( this->lookup( tableTwoSet ) < csZero ) // indicating it's not present
     if ((this->lookup( tableTwoSet )).isLessThanZero() )
     {
       // There is no multi-col uec list for tableTwoSet. If it is unique
       // so we fake it with the rowcount, else display missing stats warning.
       if (tableTwoSetUnique )
       {
 	// get base rowcount of the table from original colStats
 	// and set the multi-col UEC equal to this rowcount
 	CostScalar baseRowCount = tableTwoDesc->getNATable()->getOriginalRowCount();
 	this->insertPair(tableTwoSet, baseRowCount);
       }
       else
       {
 	if (( tableOneSetUnique ) || (!CURRSTMT_OPTDEFAULTS->histSkipMCUecForNonKeyCols()) )
 	{
           // But it is being joined to unique columns so display missing stats warning
           // if the warning could be useful. The warning level should be greater
           // than or equal to 3 for the MC warnings from Join to be displayed
           if ( (CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() > 3) &&
                 (isMCStatsUseful(tableTwoSet, tableTwoDesc)))
             displayWarning = TRUE;
           else
             displayWarning = FALSE;

           displayMissingStatsWarning(tableTwoDesc,
 		              tableTwoSet,
 		              largeTableNeedsStats,
                               displayWarning,
                               colStats,
                               redFromSC,
                               FALSE,
                               REL_JOIN);

 	}
       }
     }

   } // no WARNING MULTI_COLUMN_STATS_NEEDED's currently in diags

   // MC adjustment for semi_joins is done little differently that inner joins.
   // For semi_joins we use MIN of MC_UEC instead of MAX MC_UEC.
   // We will not do any short cut MC on unique column set
   NABoolean isNotASemiJoin = ((expr && !expr->isSemiJoin() && !expr->isAntiSemiJoin()) ? TRUE : FALSE);

   // if any of the sides was unique, and the MCUEC of the second is contained in the
   // unique side, use row count of non-unique side as the join cardinality.
   // Else determine the largest MCUEC and use that to compute adjustment
   // for MC UEC
   CostScalar rowcountOfNonUniqSide = csMinusOne;
   if ((CmpCommon::getDefault(COMP_BOOL_149) == DF_ON) &&
       isNotASemiJoin &&
       (tableOneSetUnique || tableTwoSetUnique) )
   {

     rowcountOfNonUniqSide = getRowcountOfNonUniqueColSet(expr, tableOneList, tableTwoList,
                                                       tableOneSetUnique, tableTwoSetUnique);

     if (rowcountOfNonUniqSide > 0)
     {
       if (CmpCommon::getDefault(COMP_BOOL_147) == DF_OFF)
         checkForLowBound = FALSE;
       else
         checkForLowBound = TRUE;

       // This is a misnomer - multiColUec will be storing row count of non-unique side
       // instead of MC UEC.
       multiColUec = rowcountOfNonUniqSide;
       joinOnUnique = TRUE;
       // joinValueIdPairs as an output parameter contains the join pairs that were
       // not used for MCUEC. Since we used all columns for uniqueness, we will clear
       // this parameter.
       joinValueIdPairs.clear();
       return TRUE;
     }
   }

   // The joining set is not unique. Compute adjustment for MC UECs

   // Now the join column ValueIds are partitioned into two sets.  Let's
   // see whether we can find MC-info corresponding to (the largest
   // possible) subsets of these sets such that one subset is being joined
   // to the other (i.e., both sets together add up to a subset of the
   // entries in joinPairsUsed)

   // Notice, this is a fairly complicated operation!  It seems that the
   // best way to do this is iterate through what MC-info we have -- note
   // that if we don't have actual multi-column information, then we
   // shouldn't have gotten this far.

   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;
   MultiColumnUecListIterator iter( *this );

   LIST(ValueIdSet) tableOnePossibles(STMTHEAP); // we'll iter over these
   SET(ValueIdSet)  tableTwoPossibles(STMTHEAP); // we'll do lookups over these

   for ( iter.getNext( keyEntry, uecEntry );
         keyEntry != NULL && uecEntry != NULL;
         iter.getNext( keyEntry, uecEntry ) )
   {
     // want to consider only those entries containing MC-info
     if ( keyEntry->entries() < 2 ) continue;

     // we could check to make sure they're referencing the same table
     // desc, as above, but the check below should be faster and
     // more-than-sufficient for our needs
     if ( tableOneSet.contains( *keyEntry ) )
       tableOnePossibles.insert( *keyEntry );
     else if ( tableTwoSet.contains( *keyEntry ) )
       tableTwoPossibles.insert( *keyEntry );
   }

   if ( tableOnePossibles.entries() == 0 OR
        tableTwoPossibles.entries() == 0 )
   {
     // we are returning because of insufficient multi-column
     // information for this table
     checkForLowBound = TRUE;
     return FALSE;
   }

   // OK -- now we have two lists of possibly useful MC-info that we might
   // be able to use; now we need to check to see whether any pair (x,y)
   // between these two lists actually satisfies the requirement that the
   // columns in x are being joined to the columns in y

   // Plan : iterate over the entries in tableOnePossibles
   //
   // For each entry in tableOnePossibles, we determine, from the joinPairsUsed,
   // what multi-column uec information we need to have in tableTwoPossibles.
   //
   // If that entry does not exist in tableTwoPossibles, we remove the current entry
   // in tableOnePossibles and continue.
   //
   // When we're done, whatever remains in tableOnePossibles is usable.
   // Then it's a matter of determining which entry in tableOnePossibles is
   // best; after we do this, we need to remove some entries from
   // joinValueIdPairs (the ones that correspond to the best entry in
   // tableOnePossibles) and set the value of maxMultiColUec to be the
   // larger of the two tables' multi-column uec.

   NAList<MCJoinPairStruct *> corrList(CmpCommon::statementHeap()); // for "correspondence list"
   NAList<ValueIdList> remainingPairs(CmpCommon::statementHeap()); // tmp var
   CostScalar maxInitUecProduct = csOne;
   CostScalar minInitUecProduct = csOne;

   for ( i = 0; i < tableOnePossibles.entries(); i++ )
   {
     maxInitUecProduct = csOne;
     minInitUecProduct = csOne;

     // need to clear the entries for this call to findMatchingColumns()
     remainingPairs.clear();

     ValueIdSet needInTableTwo =
       this->findMatchingColumns (
 	tableOnePossibles[i],
 	joinPairsUsed,
 	remainingPairs,
 	maxInitUecProduct,
         minInitUecProduct,
 	checkForLowBound
 	);

     // if checkForLowBound was returned from findMatchingColumns as
     // no statistics existed for some column which was a part of Multi-column
     // set, then return FALSE

     if (checkForLowBound )
       return FALSE;

     // if the table 2 MC-info doesn't exist, continue
     if ( NOT tableTwoPossibles.contains( needInTableTwo ) )
       continue;

     MCJoinPairStruct *entry = new (CmpCommon::statementHeap()) MCJoinPairStruct();
     // Start setting the entries for MC computation.
     // Set the columns from the two tables for which MC UEC exist
     entry->tableOneCols_       = tableOnePossibles[i];
     entry->tableTwoCols_       = needInTableTwo;
     if (isNotASemiJoin)
       entry->prodInitUec_        = maxInitUecProduct;
     else
       entry->prodInitUec_        = minInitUecProduct;

     CostScalar mcUEC1 = this->lookup (entry->tableOneCols_);
     CostScalar mcUEC2 = this->lookup (entry->tableTwoCols_);

     if (expr && !isNotASemiJoin)
     {
       // if it is a semi_join, save the left MCUEC
       ColStatDescList leftColStatsList = ((Join *)expr)->child(0).outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->colStats();
       if (leftColStatsList.contains(entry->tableOneCols_))
         entry->leftMCUec_ = mcUEC1;
       else
         entry->leftMCUec_ = mcUEC2;
     }

     // now set the max MC UEC.
     if(CmpCommon::getDefault(COMP_BOOL_141) == DF_OFF)
     {
       // If COMP_BOOL_141 is OFF, set the maxMCUEC from the
       // base table, and the product of max init UEC as it exists in the base table
       // if it is a semi_join, use min UECs instead of max
       // This is the original behaviour from denali days
       if ( mcUEC1 > mcUEC2)
       {
         if (isNotASemiJoin)
         {
 	  entry->multiColUec_ = mcUEC1;
 	  entry->baseRowCount_ = tableOneDesc->getNATable()->getOriginalRowCount();
         }
         else
         {
 	  entry->multiColUec_ = mcUEC2;
 	  entry->baseRowCount_ = tableTwoDesc->getNATable()->getOriginalRowCount();
         }
       }
       else
       {
         if (isNotASemiJoin)
         {
 	  entry->multiColUec_ = mcUEC2;
 	  entry->baseRowCount_ = tableTwoDesc->getNATable()->getOriginalRowCount();
         }
         else
         {
 	  entry->multiColUec_ = mcUEC1;
 	  entry->baseRowCount_ = tableOneDesc->getNATable()->getOriginalRowCount();
         }
       }
     }
     else
     {
       // If COMP_BOOL_141 is ON, set the maxMCUEC after applying reduction from local predicates,
       // and the product of max init UEC maxed out to the base row count of the table for
       // which the MC UEC is being picked up for all except semi joins
       // for semi joins take the minimum uec
       CostScalar baseRowCountForTableOne = tableOneDesc->getNATable()->getOriginalRowCount();
       CostScalar baseRowCountForTableTwo = tableTwoDesc->getNATable()->getOriginalRowCount();

       // get the left and the right children of Join and compute highest reduction from local predicates
       // for the set of joining columns
       if (expr && expr->isAnyJoin())
       {
         CostScalar highestUecRedByLocalPreds1 = ((Join *)expr)->highestReductionForCols(entry->tableOneCols_);
         CostScalar highestUecRedByLocalPreds2 = ((Join *)expr)->highestReductionForCols(entry->tableTwoCols_);

         // There is no way, the reduction should go beyond 1
         if (highestUecRedByLocalPreds1 > csOne)
         {
           CCMPASSERT ("Reduction from local predicates is greater than 1");
           highestUecRedByLocalPreds1 = csOne;
         }
         if (highestUecRedByLocalPreds2 > csOne)
         {
           CCMPASSERT ("Reduction from local predicates is greater than 1");
           highestUecRedByLocalPreds2 = csOne;
         }
         mcUEC1 = (mcUEC1 * highestUecRedByLocalPreds1).minCsOne();
         mcUEC2 = (mcUEC2 * highestUecRedByLocalPreds2).minCsOne();
       }

       // Now use the larger of the reduced MC UEC to do cardinality adjustments
       if ( ( mcUEC1 > mcUEC2) || ( (mcUEC1 == mcUEC2) &&
 	(baseRowCountForTableOne > baseRowCountForTableTwo) ) )
       {
         if (isNotASemiJoin)
         {
 	  entry->multiColUec_ = mcUEC1;
 	  entry->baseRowCount_ = baseRowCountForTableOne;
         }
         else
         {
 	  entry->multiColUec_ = mcUEC2;
 	  entry->baseRowCount_ = baseRowCountForTableTwo;
         }
       }
       else
       {
         if (isNotASemiJoin)
         {
 	  entry->multiColUec_ = mcUEC2;
 	  entry->baseRowCount_ = baseRowCountForTableTwo;
         }
         else
         {
 	  entry->multiColUec_ = mcUEC1;
 	  entry->baseRowCount_ = baseRowCountForTableOne;
         }
       }
    }// end COMP_BOOL_141 = ON

     // -----------------------------------------------------------
     // want to write: entry.remainingJoinPairs_ = remainingPairs ;
     entry->remainingJoinPairs_->clear() ; // probably unnecessary
     for ( j = 0; j < remainingPairs.entries(); j++ )
       entry->remainingJoinPairs_->insert( remainingPairs[j] );
     // -----------------------------------------------------------

     if ( entry->multiColUec_.isGreaterThanZero() ) // sanity check -- uec<=0 is bad ...
       corrList.insert( entry );

     if ( entry->remainingJoinPairs_->entries() == 0 )
       break; // these columns cover all joins columns -- best we can do -- stop now
   }

   // if the corresponding multi-column uec information does not exist in table
   // two, then we cannot do anything, abort
   if ( corrList.entries() == 0 )
   {
     // we are returning because of insufficient multi-column
     // information for table 2
     checkForLowBound = TRUE;
     return FALSE;
   }

   // Now we have a list of possible return information; find out which entry is the best

   CollIndex bestIdx = 0; // index of best entry so far
   for ( i = 1; i < corrList.entries(); i++ )
   {
     if ( corrList[   i   ]->remainingJoinPairs_->entries() <
 	 corrList[bestIdx]->remainingJoinPairs_->entries() )
       bestIdx = i;
   }

   // -----------------------------------------------------------
   // Until now, we have not changed the values of the function parameters --
   // now we set these values and prepare to return
   //
   // Don't forget to add back those joinPairs that didn't reference either
   // tableOne or tableTwo!
   // -----------------------------------------------------------

   // -----------------------------------------------------------
   // if we had a better C++ interface, we'd write the following five lines as:
   //    joinValueIdPairs = *(corrList[bestIdx].remainingJoinPairs_) + joinPairsUnused ;
   // ----------------------------------------------------------
   LIST(ValueIdList) oldJoinValueIdPairs(joinValueIdPairs,
                                         CmpCommon::statementHeap());
   joinValueIdPairs.clear();

   SET(ValueIdSet) joinValueIdPairSet(CmpCommon::statementHeap());
   ValueIdSet vidset;

   for ( i = 0; i < corrList[bestIdx]->remainingJoinPairs_->entries(); i++ )
   {
     vidset = corrList[bestIdx]->remainingJoinPairs_->at(i);
     joinValueIdPairSet.insert( vidset );
   }

   for ( i = 0; i < joinPairsNotUsed.entries(); i++ )
   {
     vidset = joinPairsNotUsed[i];
     joinValueIdPairSet.insert( vidset );
   }

   for (i=0;
        i<joinValueIdPairSet.entries();
        i++)
   {
      vidset = joinValueIdPairSet.at(i);
      ValueIdList vlist = vidset;
      joinValueIdPairs.insert(vlist);
   }

   prodInitUec    = corrList[bestIdx]->prodInitUec_; // #retvar
   multiColUec    = corrList[bestIdx]->multiColUec_; // #retvar
   baseRCForMCUEC = corrList[bestIdx]->baseRowCount_; // #retvar
   leftMCUec      = corrList[bestIdx]->leftMCUec_;

   // ------------------------------------------------------------------
   // if there are two or more join predicates that are uncovered by our
   // work above, then it might be possible to find another MC-uec value
   // for these remaining values -- recurse and see ...
   // only make the recursive call if there is a change in
   // joinValueIdPairs
   // ------------------------------------------------------------------
   if ( oldJoinValueIdPairs == joinValueIdPairs )
   {
   }
   else if ( joinValueIdPairs.entries() >= 2 )
   {
     // $$$ for the short-term, we'll recurse; later on, if this
     // $$$ appears to be a performance hit, we can avoid redoing
     // $$$ a lot of the same work

     CostScalar moreMultiColUec = csMinusOne,
 	       moreInitUec  = csMinusOne,
                moreLeftMCUec = csOne;
     NABoolean insuffMCInfo = FALSE;
     CostScalar newBaseRowCount = csOne;

     if ( this->getUecForMCJoin( joinValueIdPairs,
 				largeTableNeedsStats,
 				expr,
 				moreInitUec,
 				moreMultiColUec,
 				newBaseRowCount,
                                 moreLeftMCUec,
 				insuffMCInfo,
                                 joinOnUnique,
                                 colStats,
                                 csMinusOne) )
     {
       CCMPASSERT( moreMultiColUec.isGreaterThanZero() );	// sanity check
       prodInitUec = (prodInitUec * moreInitUec).minCsOne();	  // multiply the maxInit-uec's together
       multiColUec = (moreMultiColUec * multiColUec).minCsOne();  // multiply the MC-uec's together
       leftMCUec = (moreLeftMCUec * leftMCUec).minCsOne();
       baseRCForMCUEC = newBaseRowCount.minCsOne();
     }
   }

   // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
   // $$$ One definite problem with the above code (among many, perhaps) :
   // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
   // $$$ if we do something like
   // $$$
   // $$$   {T1.a=T2.a=T3.a, T1.b=T2.b, T1.c=T2.c}
   // $$$
   // $$$ reducing it to
   // $$$
   // $$$   {T1.a=T2.a, T1.b=T2.b, T1.c=T2.c}
   // $$$
   // $$$ but only have multicolumn uec information for
   // $$$
   // $$$   <T1.b,T1.c>, <T2.b,T2.c>
   // $$$
   // $$$ then the first join, as returned from this function
   // $$$ to the calling fn, is <T1.a,T2.a> (we've removed the
   // $$$ reference to table T3!).  This won't cause real
   // $$$ problems, but it's a loss of information.
   // $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
   if (CmpCommon::getDefault(COMP_BOOL_147) == DF_OFF)
     checkForLowBound = FALSE;
   else
     checkForLowBound = TRUE;

   CCMPASSERT( prodInitUec.isGreaterThanZero() );        // sanity check

   return TRUE;
 }

 // -----------------------------------------------------------------------
 // MultiColumnUecList::containsMCinfo
 //
 // It is very possible for the MultiColumnUecList to contain only
 // single-column entries.  Most of the usage of this class is predicated
 // on it actually containing multi-column information in this list -- if
 // there isn't, then very often there's no point in continuing the current
 // operation.
 //
 // This routine answers the question of whether any TRUE multi-column
 // information exists in the MultiColumnUecList.
 // -----------------------------------------------------------------------
 NABoolean
 MultiColumnUecList::containsMCinfo () const
 {
   ValueIdSet * keyEntry = NULL;
   CostScalar * uecEntry = NULL;
   MultiColumnUecListIterator iter( *this );

   for ( iter.getNext( keyEntry, uecEntry );
         keyEntry != NULL && uecEntry != NULL;
         iter.getNext( keyEntry, uecEntry ) )
     {
       if ( keyEntry->entries() > 1 )
         return TRUE;
     }
   return FALSE;
 }

 // Following method creates multi-col UEC for larger set of columns
 // using partial overlapping multi-col UECs.
 // For example, if MC-UEC available - (a, b, c) (c, d).
 // Then MC (a, b, c, d) = MC (a, b, c) * MC (c, d) / MC (c)
 // returns TRUE or FALSE depending on whether it was able to create
 // any statistics from partial multi-col UECs
 NABoolean MultiColumnUecList::createMCUECWithOverlappingColSets(
 					      ValueIdSet & remainingCols, /* in / out */
 					      ValueIdSet & cumulativeColSetWithMCUEC, /* in / out */
 					      CostScalar & mcUec, /* in / out */
 					      CostScalar oldRowCount)
 {
   NABoolean statsCreated = FALSE;

   // There can be several levels of overlapping. Example there can be one
   // column overlapping, or two columns overlapping and so on. Process of
   // trying all overlapping combinations can be very expensive, and might
   // not be very cost effective. Hence we shall look for only first level
   // of overlapping, in other words look for only those partial multi-col
   // UECs in which we have one column overlapping
   CollIndex noOfColsInterested = remainingCols.entries() + 1;

   while (noOfColsInterested > 1)
   {

     // traverse through all the entries of the multiColUecList for this table
     ValueIdSet * keyEntry = NULL ;
     CostScalar * uecEntry = NULL ;
     ValueIdSet overlappingCol;
     MultiColumnUecListIterator iter (*this) ;

     Int32 i ;
     double smallestIPFactor = 1;
     ValueIdSet * setWithMinIPFactor = NULL;

     for ( iter.getNext (keyEntry, uecEntry), i = 1 ;
 	  keyEntry != NULL && uecEntry != NULL ;
 	  iter.getNext (keyEntry, uecEntry), i++ )
     {
       ValueIdSet cols;
       cols = *keyEntry;
       if (keyEntry->entries() != noOfColsInterested)
 	continue;

       // there is no point in looking forward if the number of entries are different

       // it could be interesting set, if it contains all remaining columns, and
       // one column from the largestSubset

       cols.subtractSet(remainingCols);
       if (cols.entries() != 1)
       {
 	// contains no or more than one overlapping columns
 	// so not interested
 	continue;
       }
       if (!(cumulativeColSetWithMCUEC.contains(cols)) )
       {
 	// does not contain overlapping columns, so continue to look for more
 	continue;
       }
       CostScalar keyUec = lookup(*keyEntry);

       double ipFactor;
       double SCproductUec = 1.0;

       for (ValueId keyCol = keyEntry->init();
 			    keyEntry->next(keyCol);
 			    keyEntry->advance(keyCol))
 	  SCproductUec *= lookup(keyCol).value();

       ipFactor = keyUec.value()/SCproductUec;

       if (ipFactor <= smallestIPFactor)
       {
 	// found the overlapping column.
 	// There could be more entries with overlapping columns, so pick the
 	// one with minimum correlation factor. Example, MC UEC available -
 	// (x, y), (x, z), (y, z). To create (x, y, z), we could have
 	// (x, y) + (x, z) OR (x, y) + (y, z) OR (x, z) + (y, z)
 	// From the three, we shall pick the one which for which
 	// (multi-column UEC / product of single column UECs) is smallest
 	setWithMinIPFactor = keyEntry;
 	overlappingCol = cols;
 	smallestIPFactor = ipFactor;
       }
     }// for all entries()
     // see if we can get any more smaller partial multi-col UECs

     // For example, we were looking for a UEC with three columns, and we could
     // not find any (setWithMinIPFactor == NULL), then look for two
     // columns and continue. If we were able to find UEC for three columns,
     // then check for any other remaining columns
     if (setWithMinIPFactor)
     {
       statsCreated = TRUE;
       mcUec = mcUec * lookup(*setWithMinIPFactor) / lookup(overlappingCol);
       cumulativeColSetWithMCUEC.addSet(*setWithMinIPFactor);
       remainingCols.subtractSet(*setWithMinIPFactor);
       noOfColsInterested = remainingCols.entries() + 1;

       // while processing, if the cumulative row count becomes greater than
       // the rowcount, break out of the loop
       if (mcUec >= oldRowCount)
       {
 	mcUec = oldRowCount;
 	break;
       }
     }
     else
       noOfColsInterested--;
   } // while (noOfColsInterested > 1)
   return statsCreated;
 } // MultiColumnUecList::createMCUECWithOverlappingColSets

 // Following method creates multi-col UEC for larger set of columns
 // using partial disjoint multi-col UECs.
 // For example, if MC-UEC available - (a, b) (c, d).
 // Then MC (a, b, c, d) = MC (a, b) * MC (c, d)
 NABoolean MultiColumnUecList::createMCUECWithDisjointColSets(
 					ValueIdSet & remainingCols, /* in / out */
 					ValueIdSet & cumulativeColSetWithMCUEC, /* in / out */
 					CostScalar & mcUec /* in / out*/,
 					CostScalar oldRowCount)
 {

   NABoolean statsCreated = FALSE;

   while (remainingCols.entries() > 1)
   {
     // collect all disjoint multi-col UEC column sets. In case of a conflict, get the
     // one with largest correlation
     const ValueIdSet & largestSubset = this->largestSubset(remainingCols);

     if (largestSubset.entries() < 2)
     {
       // return no more multi-col UECs for this table
       return statsCreated;
     }

     mcUec *= lookup(largestSubset);
     cumulativeColSetWithMCUEC.addSet(largestSubset);
     remainingCols.subtractSet(largestSubset);
     statsCreated = TRUE;

     // if UEC has exceeded rowcount, no need to go further
     if (mcUec >= oldRowCount)
     {
       mcUec = oldRowCount;
       break;
     }
   } // end while (remainingCols.entries() > 1)

   return statsCreated;

 } // MultiColumnUecList::createMCUECWithDisjointColSets

 // In the following method, we shall create a new MC list.
 // This list contains MC-UEC for only those column set, which include
 // all columns of colsWithReductions and at most one column from
 // cumulativeColSetWithMCUEC

 MultiColumnUecList * MultiColumnUecList::createMCListForRemainingCols(
 			      ValueIdSet remainingCols,
 			      ValueIdSet cumulativeColSetWithMCUEC)
 {

   MultiColumnUecList * groupUec = new (STMTHEAP) MultiColumnUecList ();
   // There can be several levels of overlapping. Example there can be one
   // column overlapping, or two columns overlapping and so on. Process of
   // trying all overlapping combinations can be very expensive, and might
   // not be very cost effective. Hence we shall look for only first level
   // of overlapping, in other words look for only those partial multi-col
   // UECs in which we have one column overlapping
   CollIndex noOfColsInterested = remainingCols.entries();

   ValueIdSet * keyEntry = NULL ;
   CostScalar * uecEntry = NULL ;
   ValueIdSet overlappingCol;
   MultiColumnUecListIterator iter (*this) ;

   // traverse through the multi-column uec list to get the relevant mc entries
   Int32 i;

   for ( iter.getNext (keyEntry, uecEntry), i = 1 ;
 	keyEntry != NULL && uecEntry != NULL ;
 	iter.getNext (keyEntry, uecEntry), i++ )
   {
     // We identify the interesting entries in the MC list as follows:
     // Example, remainingCols = (a, b, c), and cumulativeColSetWithMCUEC is (d, e, f)
     // so the only entries that can interest are (a, b), (a, b, c), (a, b, d),
     // (a, b, c, d) etc. The entries that are of no interest are
     // (a, b, c, g) - extra column 'g'
     // (a, b, d, e) - more than one overlapping columns
     // (a, b, c, d, e) - more than one overlapping column. This case can be identified
     //		just by counting the number of entries in the MC set. Number
     //		of entries should not be more than number of columns remaining + 1
     // so not interested

     // if the number of entries in the MC is greater than (no. of cols
     // remaining + 1) then we are not interested in this.
     // interested (a, b, c). Available is (d, e, f)
     // Current MC entry could be
     // (a, b, c, d, e) - two overlapping
     // or (a, b, c, d, g) - one overlapping but an extra column
     // or (a, b, g, h, i) - no overlapping but extra columns
     if (keyEntry->entries() > (noOfColsInterested + 1) )
 	continue;

     // MC entry has
     // less than columns interested - could be (a, b) or (a, d) or (a, g) or (g, h)..
     // equal to columns interested - could be (a, b, c), (a, b, d), (a, g, d),
     //				      (a, g, e) ..
     // equal to columns interested + 1 - could be (a, b, c, d), (a, b, d, e),
     //				      (a, b, c, e), (a, b, d, g), ....
     ValueIdSet cols;
     cols = *keyEntry;

     // it could be intereting set, if it contains all remaining columns, and
     // one column from the largestSubset

     cols.subtractSet(remainingCols);
     if (cols.entries() > 1)
     {
       // contains no or more than one overlapping columns. Or any other extra columns
       // Entries like (g, h), (a, g, d), (a, g, e), (a, b, d, e), (a, b, d, g) will
       // be eliminated. Since all have more than one column overlapping or some
       // extra columns
       continue;
     }

     // Entries remaining are (a, b), (a, d), (a, g), (a, b, c), (a, b, d),
     // (a, b, c, d), (a, b, c, e)
     if ((cols.entries() == 1) AND !(cumulativeColSetWithMCUEC.contains(cols)) )
     {
       // does not contain overlapping columns, so not interested. (a, g)
       continue;
     }

     // entries that will be added are like
     // (a, b), (a, b, c) - cols.entries() == 0, no columns overlapping
     // (a, d), (a, b, d), (a, b, c, d), (a, b, c, e) - cols.entries() == 1
     // and cumulativeColSetWithMCUEC.contains(cols)
     //
     groupUec->insertPair(*keyEntry, *uecEntry);
   }
   return groupUec;
 } // MultiColumnUecList::createMCListForRemainingCols


 //
 // useful debugging routines
 //

 void
 MultiColumnUecList::print (FILE *ofd,
                            const char * prefix,
                            const char * suffix) const

 {
 #ifndef NDEBUG
   ValueIdSet * keyEntry = NULL ;
   CostScalar * uecEntry = NULL ;
   MultiColumnUecListIterator iter (*this) ;

   fprintf (ofd, "================================================\n") ;
   fprintf (ofd, "%sEntries: %i\n", prefix, this->entries() ) ;

   Int32 i ;
   for ( iter.getNext (keyEntry, uecEntry), i = 1 ;
         keyEntry != NULL && uecEntry != NULL ;
         iter.getNext (keyEntry, uecEntry), i++ )
     {
       fprintf (ofd, "%s  (** entry %i**) ",prefix,i) ;
       keyEntry->display() ;
       fprintf (ofd, " ===> uec: %g\n", uecEntry->value()) ;
     }
   fprintf (ofd, "================================================\n") ;
 #endif
 }

 void
 MultiColumnUecList::display() const
 {
   print();
 }

 void
 MultiColumnUecList::displayMissingStatsWarning(TableDesc * mostRefdTable,
                                               ValueIdSet predCols,
                                               NABoolean largeTableNeedsStats,
                                               NABoolean displayWarning,
                                               const ColStatDescList &colStats,
                                               CostScalar redFromSC,
                                               NABoolean quickStats,
                                               OperatorTypeEnum op) const
 {
   HSLogMan *LM = HSLogMan::Instance();
   LM->LogTimeDiff("START MISSING HISTOGRAM WARNING MESSAGES", TRUE);

   // Do not display warning if user does not want to use multi-column
   if ((predCols.entries() > 1) &&
     (CmpCommon::getDefault(HIST_MC_STATS_NEEDED) == DF_OFF) )
     return;

   // Do not display warning if the query is an internal query from
   // the executor

   if (Get_SqlParser_Flags(INTERNAL_QUERY_FROM_EXEUTIL))
     return;

   const NATable * mostRefdNATable = mostRefdTable->getNATable();

   // create a set of column positions for predCols. These are all base tables
   // so we should be able to cast each one of them to BaseCols. This is necessary
   // to take care of cases where we have the same columns appearing in both the
   // query and its sub-query. If we go by the columns ValueIds, then, since the
   // ValueIds of the column in the two places would be different, the warning
   // would be inserted twice. A good example of this is TPCD query 2, where
   // columns such as ps_partkey, ps_suppkey appear in both the main query and
   // its sub-query. By caching with its column position we can avoid that.
   // This should not be very expensive, since column numbers are cached with the
   // column expression

   CollIndexSet setOfColsWithMissingStats(NULL,STMTHEAP);

   // define ValueId outside of for loop, to avoid c++ compiler error.
   ValueId col;
   for (col = predCols.init();
        predCols.next(col);
        predCols.advance(col) )
   {
     BaseColumn * baseCol = col.castToBaseColumn();

     if (baseCol == NULL)
       return;

     CollIndex colNumber = baseCol->getColNumber();

     NAColumn *column = baseCol->getTableDesc()->getNATable()->
                                 getNAColumnArray()[colNumber];

     // Don't give a warning if it is not marked for histogram
     // or it is not a user column. Exception is salt column, for
     // which we suppress warning, but create empty histogram if
     // automation is enabled.
     if ( !column->isReferencedForHistogram() ||
          (!column->isUserColumn() && !column->isSaltColumn()))
       return;
     setOfColsWithMissingStats.insert(colNumber );
   }

   // Now check to see, if the warning for missing MC stats has already been
   // inserted in the diags area. If it does then return
   if (mostRefdNATable->doesMissingStatsWarningExist(setOfColsWithMissingStats) )
     return;

   HSGlobalsClass::schemaVersion = mostRefdNATable->getObjectSchemaVersion();
   Int32 warningNumber;
   Lng32 ustatAuto  = CmpCommon::getDefaultLong(USTAT_AUTOMATION_INTERVAL);
   Lng32 ustatLevel = CmpCommon::getDefaultLong(USTAT_AUTO_MISSING_STATS_LEVEL);
   // Determine if the missing histogram should be inserted by the compiler into the HISTOGRAMS table
   // for update statistics automation.  The following must ALL be true:
   //  1. NOT an MP table
   //  2. Schema version of table >= 2300
   //  3. Table is not volatile or automation is ON for volatile tables.
   //  4. Auto missing stats CQD level is set high enough for this single or multi-col hist.
   if (  ustatAuto > 0 &&
         HSGlobalsClass::schemaVersion >= COM_VERS_2300 &&
         (!mostRefdNATable->isVolatileTable() ||
          CmpCommon::getDefault(USTAT_AUTO_FOR_VOLATILE_TABLES) == DF_ON) &&
         ((ustatLevel >= 1 && predCols.entries() == 1) ||
          (ustatLevel >= 2 && op == REL_SCAN)          ||
          (ustatLevel >= 3 && op == REL_JOIN)          ||
          (ustatLevel >= 4 && op == REL_GROUPBY)          )
      )
     {
        // Ustat automation is on for this object.
        // Insert empty entry to HISTOGRAMS table.

        // Determine if this is a synonym and if so, get name of table instead.
        Int64 tableUID;
        NAString tableName;
        NAString histogramSchemaName;

        if (mostRefdNATable->getIsSynonymTranslationDone()) {
          tableName = mostRefdNATable->getSynonymReferenceName();
          tableUID = mostRefdNATable->getSynonymReferenceObjectUid().get_value();
          // Get catalog and schema name.  Find end of schema name.
          QualifiedName qualName(tableName, 3);
          histogramSchemaName = qualName.getSchemaNameAsAnsiString();
        }
        else {
          tableName = mostRefdNATable->getExtendedQualName().getText();
          tableUID = mostRefdNATable->objectUid().get_value();
          histogramSchemaName = mostRefdNATable->getTableName().getCatalogName() + "."
                             + mostRefdNATable->getTableName().getSchemaName();
        }
        NAString histogramTableName = getHistogramsTableLocation(histogramSchemaName,FALSE)
                                      + "." + HBASE_HIST_NAME;

        Lng32 retcode = -1;
        HSinsertEmptyHist hist(tableUID, tableName.data(), histogramTableName.data());
        Lng32 colCount = 0;
        // get col numbers
        for ( col = predCols.init();
              predCols.next(col);
              predCols.advance(col))
          {
             BaseColumn * baseCol = col.castToBaseColumn();
             if (baseCol)
               {
                  Lng32 colNumber = baseCol->getColNumber();
                  retcode = hist.addColumn(colNumber);
                  if (retcode == 0)
                    colCount++;
               }
          }
        if ( retcode == 0 && predCols.entries() == (CollIndex)colCount ) // got every col number?
          {
          NABoolean switched = FALSE;
          CmpContext* prevContext = CmpCommon::context();
          // switch to another context to avoid spawning an arkcmp process when compiling
          // the user metadata queries on the histograms tables
          if (IdentifyMyself::GetMyName() == I_AM_EMBEDDED_SQL_COMPILER)
             if (SQL_EXEC_SWITCH_TO_COMPILER_TYPE(CmpContextInfo::CMPCONTEXT_TYPE_USTATS))  //CMPCONTEXT_TYPE_META))
             {
                //failed to switch/create metadata CmpContext,  continue using current compiler context
             }
             else
             {
                switched = TRUE;
                // send controls to the context we are switching to
                sendAllControls(FALSE, FALSE, FALSE, COM_VERS_COMPILER_VERSION, TRUE, prevContext);
             }

          retcode = hist.insert();

          // switch back to previous compiler context if we switched above
          if (switched == TRUE)
            SQL_EXEC_SWITCH_BACK_COMPILER();
          }

        if (predCols.entries() == 1)
        {
          if (quickStats)
            warningNumber = SINGLE_COLUMN_SMALL_STATS_AUTO;
          else
            warningNumber = SINGLE_COLUMN_STATS_NEEDED_AUTO;
        }
        else
          warningNumber = MULTI_COLUMN_STATS_NEEDED_AUTO;
       }
       // Ustat automation is NOT on.
       else
       if (predCols.entries() == 1)
       {
         if (quickStats)
           warningNumber = SINGLE_COLUMN_SMALL_STATS;
         else
           warningNumber = SINGLE_COLUMN_STATS_NEEDED;
       }
       else
         warningNumber = MULTI_COLUMN_STATS_NEEDED;
   // Do not display warning if rows < default CQD (which could be true if
   // ustat automation is on).
   if (CURRSTMT_OPTDEFAULTS->ustatAutomation() &&
       (mostRefdTable->getTableColStats()[0]->getColStats()->getRowcount() <
        CostPrimitives::getBasicCostFactor(HIST_ROWCOUNT_REQUIRING_STATS)))
     return;

   // Do not display warning for salt column, although an empty histogram may
   // have been added to histograms table above. Use new ValueIdSet nonSaltPredCols
   // for remainder of function, which is predCols with any salt column removed.
   ValueIdSet nonSaltPredCols(predCols);
   ValueIdSet saltCols;
   for (col = predCols.init();
              predCols.next(col);
              predCols.advance(col))
   {
     if (col.isSaltColumn())
       saltCols.addElement(col.toUInt32());
   }
   nonSaltPredCols.remove(saltCols);
   if (nonSaltPredCols.isEmpty())
     return;

   // Now see, if we should issue a missing single column or a missing multi-col
   // stats warning. This would depend on the number of columns on which the
   // statistics is missing.

   // check the warning level
   // Warning level set to 0, or largeTableNeeds Stats is false,
   // do not display any warning
   if ((CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() == 0) ||
       (largeTableNeedsStats == FALSE) )
     return;

   // If warning level is one, display only single column stats warning.
   if ((CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() == 1) &&
       (nonSaltPredCols.entries() > 1) )
     return;

   // If this is a multi-column (MC) stat, check to see if it should be displayed as
   // a warning based on the operation and the warning level.
   if ((nonSaltPredCols.entries() > 1) &&
       ((CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() < 2 && op == REL_SCAN)    ||
        (CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() < 3 && op == REL_JOIN)    ||
        (CURRSTMT_OPTDEFAULTS->histMissingStatsWarningLevel() < 4 && op == REL_GROUPBY)) )
     return;

   // dump the warning in the diags area

   NAString tableName(CmpCommon::statementHeap()); // table name

   NAString tableCols(CmpCommon::statementHeap()); // column names
   NAString predicates(CmpCommon::statementHeap()); // predicates

   ValueIdSet thePreds;
   thePreds.clear();

   if (mostRefdNATable->getIsSynonymTranslationDone())
     tableName = mostRefdNATable->getSynonymReferenceName();
   else
     tableName = mostRefdNATable->
       getExtendedQualName().getQualifiedNameObj().getQualifiedNameAsAnsiString();

   // collect all column names into a string

   NABoolean first = TRUE;
   NAString connectorText(", ");
   tableCols += "(";

   NAString opName (CmpCommon::statementHeap());
   switch (op)
   {
   case REL_SCAN:
     opName = "Scan";
     break;
   case REL_JOIN:
     opName = "Join";
     break;
   case REL_GROUPBY:
     opName = "GroupBy";
     break;
   }

   for (col = nonSaltPredCols.init();
        nonSaltPredCols.next(col);
        nonSaltPredCols.advance(col))
   {
       if (first)
 	first = FALSE;
       else
 	tableCols += connectorText;

     BaseColumn * baseCol = col.castToBaseColumn();

 	if (baseCol == NULL)
 	  return;

 	NAString colName(baseCol->getTableDesc()->getNATable()->
 					getNAColumnArray()[baseCol->getColNumber()]->
 					getColName(), STMTHEAP);

 	tableCols += ToAnsiIdentifier(colName);

         // if ustat logging is ON, log the predicates for which the warning is
         // being issued
         if (LM->LogNeeded() && colStats.entries() > 0)
         {
           // also collect the predicates applied on this column
           CollIndex i;
           // get the histogram for the column
           NABoolean found = colStats.getColStatDescIndexForColumn(i, col);
           if (found)
           {
             // histogram for which the warning is being issued is found
             LM->Log("histogram for column with missing stats found");
             ColStatDescSharedPtr tempStatDesc = colStats[i];
             if (tempStatDesc->getAppliedPreds().entries() > 0)
             {
               LM->Log("column for missing stats has been reduced by a predicate");
               thePreds = tempStatDesc->getAppliedPreds();
               thePreds.unparse(predicates, DEFAULT_PHASE, EXPLAIN_FORMAT, mostRefdTable);
               sprintf(LM->msg, "%s",predicates.data());
               LM->Log(LM->msg);
             }
             else
               // This condition will happen either for group bys
               // if that happens for scans or joins then there is a bug in the code
               LM->Log("column for missing stats did not have any predicate applied");
           }
           else
             // This would mean a bug in the code as this should never happen
             LM->Log("histogram for column with missing stats NOT found");
         }
   }

   tableCols += ")";

   *CmpCommon::diags() << DgSqlCode( warningNumber )
     << DgString0( tableCols )
     << DgString1( tableName )
     << DgString2(opName);

   if (LM->LogNeeded() && nonSaltPredCols.entries() > 1)
   {
     LM->Log("MC Warning for table, col set, operator, selectivity: ");
     sprintf(LM->msg, "%s, %s, %s, %e", tableName.data(), tableCols.data(), opName.data(), redFromSC.getValue());
     LM->Log(LM->msg);
   }

   LM->LogTimeDiff("END MISSING HISTOGRAM WARNING MESSAGES");

   // insert the columns with missing stats in the table Desc, so it is not displayed again
   // insert the columns with missing stats in the table Desc
   mostRefdNATable->insertMissingStatsWarning(setOfColsWithMissingStats);
 }

 // -----------------------------------------------------------------
 // isMCStatsUseful is used to determine if there is any possibility
 // of optimizer benefiting from multi-column stats. The MC stats
 // are said to be not helpful, if any subset of given column set
 // is orthogonal. More heuristics can be added later to determine
 // usefulness of MC stats
 // -----------------------------------------------------------------
 NABoolean MultiColumnUecList::isMCStatsUseful(ValueIdSet columnSet,
                                               TableDesc * tableDesc) const
 {
   // This is used for only multi column stats, hence if columnSet
   // consists of single column, return FALSE
   if (columnSet.entries() == 1)
     return FALSE;

   // check to see if columnSet is unique, if it is, return FALSE.
   if (columnSet.doColumnsConstituteUniqueIndex(tableDesc))
     return FALSE;

   // get the largest subset of columns for which MC UEC exist
   // if MC stats exist for the given column set, then return
   // column set will be identical to column set passed.
   // Since the method is currently being called only when stats
   // for columnSet is missing, we will get a subset of columns
   ValueIdSet colSetWithMCStats = largestSubset(columnSet);

   // if MC stats do not exist for any columns, the colSetWithMCStats will
   // be empty. We return TRUE then to indicate that stats will be useful
   if (colSetWithMCStats.entries() == 0)
     return TRUE;

   // if mcUec is almost equal to the rowcount, that means that columns are almost
   // orthogonal. MC stats for larger set may not give enough benefit then.
   // This is TRUE even if the largest subset is a single column. However
   // if columnSet is equal to colSetWithMCStats then we cannot use this assumption
   if (colSetWithMCStats.entries() <= columnSet.entries())
   {
     // get the MC UEC of the columnSet
     CostScalar mcUec = lookup(colSetWithMCStats);
     // get rowcount from the base table
     CostScalar tableRowCount = tableDesc->tableColStats()[0]->getColStats()->getRowcount();

     // if MC UEc is almost equal to the row count, we may not benefit much from the MC stats
     // return FALSE in that case
     CostScalar uecCushion((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_8));
     if (mcUec >= uecCushion * tableRowCount)
       return FALSE;
   }
   return TRUE;
 } // isMCStatsUseful

 CostScalar
 ColStatDescList::getCardOfBusiestStreamForUnderNJ(
                                         CANodeIdSet * outerNodeSet,
                                         const PartitioningFunction* partFunc,
                                         Lng32 numOfParts,
                                         GroupAttributes * groupAttr,
                                         Lng32 countOfCPUs)

 {
   // get the partitioning key and number of partitions

   ValueIdSet partKey = partFunc->getPartitioningKey();

   // get the total rows in the histogram
   CostScalar rowCount = (*this)[0]->getColStats()->getRowcount();

   // if number of partitions is 1, return rowcount

   if ( ( numOfParts == 1) ||
        (partFunc->isASkewedDataPartitioningFunction()) ||
 	   ( partFunc->isASinglePartitionPartitioningFunction() ) )
   {
 	return (rowCount).minCsOne();
   }

   // The cardinality is based on the number of CPUs or the number of
   // partitions (whichever is fewer) for a few situations:
   //   1) if partitioning key is empty
   //   2) the round robin partitioning scheme is used.
   //   3) the skew buster partitioning scheme is used.
   if ( (partKey.isEmpty()) ||
        (partFunc->isASkewedDataPartitioningFunction()) ||
        (partFunc->isARoundRobinPartitioningFunction()) )
   {
 	 Lng32 availableCpus = MINOF( numOfParts , countOfCPUs );
 	 return (rowCount / availableCpus).minCsOne();
   }

   // In the following loop, get the min UEC from amongst the partitioning
   // key to compute the number of streams. In the same loop also compute
   // the accumulated frequency of the partitioning key to compute cardinality
   // per stream for hash partitions

   // min UEC for partitioning key
   CostScalar uecForPartKey = csOne;

   // accumulated freq of the partitioning key
   CostScalar  accFreq = csOne;

   ValueIdSet myColumns;
   NABoolean outerReferencesExist = FALSE;
   ValueIdSet usedCols;
   ValueIdSet referencedPartKeyColSet;

   if (groupAttr)
   {
     ValueIdSet baseCols;
     ValueIdSet myInputs = groupAttr->getCharacteristicInputs();

     if (NOT myInputs.isEmpty())
     {
        outerReferencesExist = TRUE;
        myInputs.findAllReferencedBaseCols(myColumns);
     }

     if (!outerNodeSet)
     {
       rowCount = rowCount.minCsOne();
       return (rowCount/(MINOF((CostScalar)numOfParts, rowCount))).minCsOne();
     }

     ValueIdSet outerBaseCols = outerNodeSet->getUsedTableCols();
     myColumns.intersectSet(outerBaseCols);

     GroupAnalysis * grpAnalysis = groupAttr->getGroupAnalysis();

     if (grpAnalysis)
     {
        CANodeIdSet treeSet = grpAnalysis->getAllSubtreeTables();

        if (NOT treeSet.isEmpty() )
        {
          // note that UsedCols do not contain any columns
          // from outer references. They are "local"
          usedCols = treeSet.getUsedCols();
          myColumns.addSet(usedCols);
        }
      }
   }

   for (ValueId partKeyElement = partKey.init();
                                 partKey.next(partKeyElement);
                                 partKey.advance(partKeyElement) )
   {

      // extract all base columns from the partitioning key column before
      // looking for statistics

      ValueIdSet baseColSet;

      // this is because findAllReferencedBaseCols, is defined on ValueIdSet
      ValueIdSet partKeySet(partKeyElement);

      partKeySet.findAllReferencedBaseCols(baseColSet);

 	// from all the base columns for this partitioning key, get the
 	// one that belong to me. In case of joins, base column set would
 	// also contain the column I am joining to

       if (NOT myColumns.isEmpty())
          baseColSet.intersectSet(myColumns);

       // if outer references are present, then it is under a nested join
       // want to compute columns from the partitioning key VEGRef
       // that are from the outer references

       ValueIdSet partKeySourceSide;
       if (outerReferencesExist)
       {
         //want to do this; note usedCols has no outer references
         // partKeySourceSide=baseColSet - usedCols

         ValueIdSet origSet = baseColSet;
         origSet.subtractSet(usedCols);
         partKeySourceSide = origSet;
       }

       CostScalar freq;
       // get min of max frequency of any column from the given column set
       // If outer references exist compute from amongst the columns coming
       // from the outer, else do it from the inner side
       // In most cases, there would be only one column in the column
       // set for which the frequency is needed. In case of expressions
       // there could be more than one column in the column set.
       if (outerReferencesExist)
       {
         freq = getMinOfMaxFreqOfCol(partKeySourceSide).minCsOne();
       }
       else
       {
         freq = getMinOfMaxFreqOfCol(baseColSet).minCsOne();
       }

       // get minimum UEC from the given column set;
       //  this needs columns from outer references as well
       CostScalar colUec;

       if (CmpCommon::getDefault(COMP_BOOL_47) == DF_OFF)
       {
         if (outerReferencesExist)
           colUec = getMinUec(partKeySourceSide);
         else
           colUec = getMinUec(baseColSet);
       }
       else
       {
         if (outerReferencesExist)
           colUec = getMaxUec(partKeySourceSide);
         else
           colUec = getMaxUec(baseColSet);
       }

       if (colUec == csMinusOne)
          uecForPartKey = rowCount;
       else
 	uecForPartKey *= colUec;

       accFreq *= (freq / rowCount);
     } // for all partitioning key columns

   // uec cannot be greater than the row count
   uecForPartKey = MINOF(uecForPartKey, rowCount);

   // compute the number of streams
   CostScalar noOfStreams = MINOF((CostScalar)numOfParts, uecForPartKey);

   // If partitioning key column is a Random number, and activeStreams_ is less
   // than the current value of noOfStreams, then noOfStreams = activeStreams_;
   CostScalar activeStreams = partFunc->getActiveStreams();
   long randomFix = ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_26);
   if ( (partKey.entries() == 1) AND (randomFix != 0) AND (activeStreams != 0) )
   {
     // Get first key column.
     ValueId myPartKeyCol;
     partKey.getFirst(myPartKeyCol);
     // is it a random number?
     if (myPartKeyCol.getItemExpr()->getOperatorType() == ITM_RANDOMNUM)
     {
       noOfStreams = MINOF(activeStreams, noOfStreams);
     }
   }

   // based on the partitioning type, compute the cardinality of the stream
   // For all but hash type partitions, cardinality = number of rows / # of streams
   // such that # of streams = MINOF(# of partitions, UEC of partitioning key)

   if (partFunc->isAHashPartitioningFunction() ||
 	  partFunc->isATableHashPartitioningFunction() )
   {
     CostScalar cardOfFreqValue = (rowCount * accFreq).minCsOne();

     CostScalar maxCardPerStream = (((rowCount - cardOfFreqValue)/noOfStreams) + cardOfFreqValue).round();

     // cardinality per stream cannot be greater than the total row count
     maxCardPerStream = MINOF(maxCardPerStream, rowCount);

     // some of NJ plans were over-penalized by incorporate_skew_in_costing.
     // provide a cqd HIST_SKEW_COST_ADJUSTMENT to soften effect
     // of incorporate_skew_in_costing.
     // take weighted average of uniform distribution and skewed data
     // based on the CQD.
     // 0 -> get RC as if uniformly distributed
     // 1-> get RC as if skewed
     // anything in between is the linear average
     CostScalar uniformDistRowCountPerStream = rowCount / noOfStreams;
     CostScalar histSkewAdjustment =
       (ActiveSchemaDB()->getDefaults()).getAsDouble(HIST_SKEW_COST_ADJUSTMENT);
     maxCardPerStream =
       (maxCardPerStream * histSkewAdjustment) +
       (uniformDistRowCountPerStream * (csOne - histSkewAdjustment));

     return maxCardPerStream.minCsOne();
   }
   else
   {
     // for all other partitions
     // return rows per stream
     return (rowCount / noOfStreams).minCsOne();
   } // for non hash partitioning functions
 } // ColStatDescList::getCardOfBusiestStream

 //------------------------------------------------------------------
 // Determine if the join predicates consist of column sets where one
 // of the sides is unique and shares relationship "similar" to PK/FK
 // with the other side. If so, cardinality adjustment will be done
 // while merging ColStatDescs in ColStatDesc::mergeColStatDesc method.
 // This feature is OFF by default if there are more than one joining
 // columns. It'll be controlled by COMP_BOOL_149
 //------------------------------------------------------------------

 CostScalar MultiColumnUecList::getRowcountOfNonUniqueColSet( const Join *expr,
                                                         ValueIdList colList1,
                                                         ValueIdList colList2,
                                                         NABoolean list1Unique,
                                                         NABoolean list2Unique)
 {
   CostScalar rowcountOfNonUniqSide = csMinusOne;

   // We are using unique characteristic for Joins only, atleast till R2.5 (5/29/09)
   if (!expr || !expr->isAnyJoin())
     return rowcountOfNonUniqSide;

   // We will keep a copy of the set too, as these are more efficient
   // implementations for lookup etc.
   // all variables suffixed by "1" correspond to column list 1  (colList1) and
   // all suffixed by "2" correspond to colList2. There is no concept of "left"
   // and "right" here since we don't know which child of Join these belong to
   ValueIdSet colSet1 = colList1;
   ValueIdSet colSet2 = colList2;

   // indexes into the colStats list
   CollIndex i;
   // multi column UECs
   CostScalar MCUec1 = csMinusOne, MCUec2 = csMinusOne;

   // Take a cushion to take into account the fizziness that could have been introduced
   // using sampling
   CostScalar uecCushion ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_4));

   MCUec1 = this->lookup(colSet1);
   MCUec2 = this->lookup(colSet2);

   // if multiColUEC does not exist, then return, we cannot use to look for containment,
   // hence cannot use shortcut MC UEC calculations
   // Since we have already added the MCUEC of the unique side, that should always be there
   // We could still have MCUEC for non-unique side missing
   if ((MCUec1 <= 0) || (MCUec2 <= 0))
     return rowcountOfNonUniqSide;

   // the uniqueness and containment are checked on the histograms prior to this join, hence get
   // output logical properties of the children
   // Since we don't know which column list belongs to which child of join. We will look for
   // histogram of the columns in histogram list from both children
   // Create a completeList of histograms from the histograms of the left and the right children
   ColStatDescList completeList = ((Join *)expr)->child(0).outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->colStats();
   ColStatDescList rightColStatList = ((Join *)expr)->child(1).outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->colStats();

   // form a complete list of histograms from both sides
   completeList.makeDeepCopy(rightColStatList);

   // histograms for the joining columns of col list 1 and 2
   ColStatsSharedPtr colStats1 = NULL;
   ColStatsSharedPtr colStats2 = NULL;

   // if colList1 is unique, then we will look for containment, only if
   // columnList2 or the non-unique side is contained in the unique side
   if(list1Unique)
   {
     if( (MCUec1 * uecCushion) < MCUec2)
     {
       // although column set 1 is unique, the non-unique set is not contained in the
       // unique side, Check to see if individually each non-unique column is contained
       // in the unique side
       for(i =0; i<colList1.entries(); i++)
       {
         colStats1 = completeList.getColStatsPtrForColumn(colList1[i]);

         // get the histogram pointer for the column in list 1
         if (colStats1 == NULL)
         {
           // if the histogram is missing in both children, then we cannot use
           // uniqueness property, so return. Setting the flags to FALSE instead of
           // returning -1, just so we have one point of exit.
           list1Unique = FALSE;
           list2Unique = FALSE;
           break;
         }

         // found histogram for column1, now look for column 2 histogram
         colStats2 = completeList.getColStatsPtrForColumn(colList2[i]);
         if (colStats2 == NULL)
         {
           // histogram is not present with either of the children of Join, so return -1
           // Setting the flags to FALSE instead of
           // returning -1, just so we have one point of exit.
           list1Unique = FALSE;
           list2Unique = FALSE;
           break;
         }

         if ((colStats1->getBaseUec() *uecCushion) <= colStats2->getBaseUec())
         {
           // For the uniqueness condition to be satisfied, non-unique side should be
           // contained in the unique side. In this case, the non-unique is larger than
           // the unique, hence we cannot use this containment. Look for the column 2
           // uniqueness and containment feature
           list1Unique = FALSE;
           break;
         }
       }
     }
   }

  if(list1Unique)
  {
    ColStatsSharedPtr colStatsOfNonUniqSide = completeList.getColStatsPtrForColumn(colList2[0]);
    if(colStatsOfNonUniqSide)
      rowcountOfNonUniqSide = colStatsOfNonUniqSide->getRowcount();
    else
      rowcountOfNonUniqSide = csMinusOne;
  }

   // columns from side1 are non-unique check if the columns from other side
   // constitute a unique set
   if(!list1Unique && list2Unique)
   {
     // They are use by semantics, check for containment
     if( (MCUec2 * uecCushion) < MCUec1)
     {
       // although column set 2 is unique, the non-unique set is not contained in the
       // unique side. Check to see if individually each non-unique column is contained
       // in the unique side
       for(i =0; i<colList2.entries(); i++)
       {
         colStats2 = completeList.getColStatsPtrForColumn(colList2[i]);
         if (colStats2 == NULL)
         {
           // histogram is missing, so don't bother going forward, just return
           list2Unique = FALSE;
           break;
         }

         // histogram for list 1 found, now look for the corresponding histogram in list 2
         colStats1 = completeList.getColStatsPtrForColumn(colList1[i]);
         if (colStats1 == NULL)
         {
           list2Unique = FALSE;
           break;
         }

         if ((colStats2->getBaseUec() * uecCushion) <= colStats1->getBaseUec())
         {
           // if the non-unique side is not contained in the unique side, we cannot use
           // the uniqueness feature
           list2Unique = FALSE;
           break;
         }
       }
    } // if left side is contained in the right side

     if(list2Unique)
     {
       ColStatsSharedPtr colStatsOfNonUniqSide = completeList.getColStatsPtrForColumn(colList1[0]);
       if(colStatsOfNonUniqSide)
         rowcountOfNonUniqSide = colStatsOfNonUniqSide->getRowcount();
       else
         rowcountOfNonUniqSide = csMinusOne;
     }
   }
   return rowcountOfNonUniqSide;
 }

 void
 ColStatDescList::computeRowRedFactor(MergeType mergeMethod, // input
 				     CollIndex numOuterColStats, // input
 				     CostScalar rowcountBeforePreds, // input
 				     CollIndex & predCountSC, // output
                                      CollIndex & predCountMC, // output
 				     CostScalar & rowRedProduct) // output
 {
   // If this is a semi-join, it should be the case that only columns
   // in the outer table need to be scaled to the proper cardinality.
   CollIndex loopLimit = ( ( mergeMethod == SEMI_JOIN_MERGE ||
                             mergeMethod == ANTI_SEMI_JOIN_MERGE ) ?
                           numOuterColStats : entries() );

   // need to calculate the product of the row reduction factors
   CollIndex i;
   CostScalar rowCnt;
   // do floor of rowcount before preds to take into account small rounding errorrs that
   // may creep up due to cost scalar arithmetic. Use this adjusted value for computing
   // predCount for MC adjustments only
   CostScalar tempRCBeforePreds = floor(rowcountBeforePreds.getValue());

   for ( i = 0; i < loopLimit; i++ )
   {
     ColStatsSharedPtr tempColStats = (*this)[i]->getColStats();
     rowCnt = tempColStats->getRowcount();
     if (( rowCnt < rowcountBeforePreds ) || (tempColStats->isARollingColumn()))
     {
       predCountSC++;
       rowRedProduct *= rowCnt / rowcountBeforePreds;
     }
     // do a ceil of row count from histograms to take into account
     // any rounding issues that may creep up due to cost scalar arithmetic
     // example:
     // if rowCnt = 1.04 and rowcountBeforePreds = 1 then tempRC = 2 and tempRCBeforePreds = 1 -> do not increment predCountMC
     // if rowCnt = 1 and rowcountBeforePreds = 1 then tempRC = 1 and tempRCBeforePreds = 1 -> do not increment predCountMC
     // if rowCnt = 1 and rowcountBeforePreds = 1.04 then tempRC = 1 and tempRCBeforePreds = 1 -> do not increment predCountMC
     // if rowCnt = 0.99 and rowcountBeforePreds = 1 then tempRC = 1 and tempRCBeforePreds = 1 -> do not increment predCountMC
     // if rowCnt = 1 and rowcountBeforePreds = 0.99 then tempRC = 1 and tempRCBeforePreds = 0 -> do not increment predCountMC
     // if rowCnt = 1.04 and rowcountBeforePreds = 0.99 then tempRC = 2 and tempRCBeforePreds = 0 -> do not increment predCountMC
     // if rowCnt = 0.99 and rowcountBeforePreds = 1.04 then tempRC = 1 and tempRCBeforePreds = 1 -> do not increment predCountMC
     // if rowCnt = 1 and rowcountBeforePreds = 2 then tempRC = 1 and tempRCBeforePreds = 2 -> increment predCountMC
     // if rowCnt = 1.04 and rowcountBeforePreds = 1.9 then tempRC = 2 and tempRCBeforePreds = 1 -> do not increment predCountMC

     CostScalar tempRC = ceil(rowCnt.getValue());
     if (tempRC < tempRCBeforePreds)
       predCountMC++;
   }


   // -----------------------------------------------------------------------
   // in the case of an OUTER_JOIN_MERGE, we need to now undo part of that
   // reduction
   // -----------------------------------------------------------------------

   if ( mergeMethod == OUTER_JOIN_MERGE )
   {
     CostScalar rowred;
     ValueIdSet alreadyMergedHistograms;
     for( i = 0; i < numOuterColStats; i++ )
     {
       alreadyMergedHistograms.insert( (*this)[i]->getMergeState() );
     }
     for( i = numOuterColStats; i < loopLimit; i++ )
     {
       if( alreadyMergedHistograms.contains( (*this)[i]->getColumn() ) )
       {
 	rowred = csOne;
 	if ( rowcountBeforePreds.isGreaterThanZero() /* > csZero */ )
 	{
 	  rowred =
 	    (*this)[i]->getColStats()->getRowcount() / rowcountBeforePreds;
 	}
 	rowRedProduct /= rowred ; // remove this reduction, it's a duplicate
       }
     }
   }
 }

 void ColStatDescList::insertByPosition(const StatsList & other,
                                        const NAColumnArray &columnList,
                                        const ValueIdList &tableColList)
 {
   for (CollIndex i = 0; i < columnList.entries(); i++)
   {
     NAColumn * column = columnList[i];
     short position = (short) column->getPosition();

     for(UInt32 i = 0; i < other.entries(); i++)
     {
       ColStatsSharedPtr otherStats(other[i]);
       const NAColumnArray &otherColumns = otherStats->getStatColumns();

       // Skip to the next ColStats if these stats don't contain
       // this column position.
       // position is checked for only single column histograms
       if ( (otherColumns.entries() == 1) &&
           (!otherColumns.getColumnByPos(position)) )
 	      continue;

       ColStatDescSharedPtr tmpStatDesc(new (CmpCommon::statementHeap())
         ColStatDesc (other[i], tableColList), CmpCommon::statementHeap());

       if (otherColumns.entries() == 1)
       {
         // if the histogram requires only a single interval, compress it before inserting
         // it into the colStats list
         NAColumnArray tempArray = other[i]->getStatColumns();

         if ((tempArray.entries() == 1) && (tempArray[0]->isReferencedForSingleIntHist() ) )
         {
           // only for single column histograms. If only single histogram
           // is needed insert the compressed copy of the histogram, else
           // insert the full histogram
           tmpStatDesc->getColStatsToModify()->compressToSingleInt();
         }
       }
       this->insertAt(this->entries(), tmpStatDesc);
       break;
     }
   } // for each column in the column array
 }

 // This method traverses the histograms and returns the highest UEC
 // reduction applied by local predicates among the columns in the
 // input valueid set. This information is used during multi-col UEC
 // adjustment for join cardinality estimation

 CostScalar
 ColStatDescList::getHighestUecReductionByLocalPreds(ValueIdSet &cols) const
 {
   CostScalar highestUecReductionByLocalPreds = csOne;
   ColStatsSharedPtr tempColStat;

   for (ValueId vid = cols.init(); cols.next(vid); cols.advance(vid))
   {
     // Reduction from columns for which histogram is not present can be ignored
     tempColStat = this->getColStatsPtrForColumn(vid);
     if (!tempColStat)
       continue;

     CostScalar uecReductionFromLocalPreds = (tempColStat->getBaseUec()/tempColStat->getUecBeforePreds()).maxCsOne();;

     // Take the highest reduction, that is the one giving least number of rows out
     if(uecReductionFromLocalPreds < highestUecReductionByLocalPreds)
       highestUecReductionByLocalPreds = uecReductionFromLocalPreds;
   }
   return highestUecReductionByLocalPreds;
 }

 // ---------------------------------------------------------------------------
 // It is a helper method used while computing left joins. The method locates
 // histograms that have been joined to right child using inner join and
 // now need to be null augmented to simulate the left join
 // ---------------------------------------------------------------------------
 NABoolean
 ColStatDescList::locateHistogramToNULLAugment(ValueIdSet EqLocalPreds /*in*/,
                                               NAList<CollIndex> &statsToMerge /*out*/,
                                               CollIndex &rootStatIndex /*out*/,
                                               CollIndex outerRefCount /*in*/)
 {
   // Contains the maximum of number of intervals from amongst the left
   // histograms that participated in equijoins and would be null instantiated
   // Example for query like T1 left join T2 on T1.a = T2.a and T1.b = T2.b
   // If the resultant histograms after join on column 'a' contains 50 intervals
   // and the resultant histograms after join on column 'b' contains 60
   // intervals, the optimizer will choose index for column 'b' and return that
   // in rootStatIndex. saveNumBuckets will be used to contain the number of
   // largest count of intervals so far in the iteration.
   CollIndex saveNumBuckets = 0;
   // contains a flag to indicate whether the histogram that participated
   // in the equijoin has been found
   NABoolean foundFlag = FALSE;

   // --------------------------------------------------------------
   // Equijoins allow us to best determine the number of null-
   // augmented rows to be added back into the histogram(s) that
   // describe the join results.
   // First, locate histograms that capture the results of the
   // one-or-more equijoins between the inner and outer tables.
   // --------------------------------------------------------------

   for (ValueId id = EqLocalPreds.init();
                     EqLocalPreds.next (id);
                     EqLocalPreds.advance (id))
   {
     ItemExpr *pred = id.getItemExpr();

     // We are interested only in equi join predicates
     // skip any other predicates
     if (pred->getOperatorType() != ITM_VEG_PREDICATE)
       continue;

     // look thru all left histograms to find the one that
     // participated in this join
     // outerRefCount contains the count of left histograms,
     // some of which may be the result of inner join
     for (CollIndex i = 0; i < (CollIndex)outerRefCount; i++)
     {
       // We are not interested in considering histograms created for virtual
       // columns, such as tuples, for joins so skip to the next histogram
       if ((*this)[i]->getColStats()->isVirtualColForHist() )
       continue;

       // See if the leading column of any histograms reference
       // the VEGReference id.  Among those that do, remember
       // the one with the most intervals.
       // That one will be used to calculate how many rows from
       // the original outer (left) table did not meet the join
       // condition.

       NABoolean foundCandidate = FALSE;
       ItemExpr * columnFromJoinList = (*this)[i]->getVEGColumn().getItemExpr();

       if ( columnFromJoinList->getOperatorType() != ITM_VEG_REFERENCE )
       {
         // the left columns appear as VEG_REFERENCES in the predicate
         // If it is not a VEG_REFERENCE, skip this ColStatDesc and
         // continue looping
         continue;
       }

       // Check to see if the column of this histogram participated
       // in this equi-join predicate
       foundCandidate = pred->containsTheGivenValue(columnFromJoinList->getValueId());

       // if it did not, skip to next histogram
       if (!foundCandidate)
         continue;

       ColStatDescSharedPtr statDesc = (*this)[i];
       ColStatsSharedPtr colStats = statDesc->getColStats();

       // save the index of this histogram, to be merged later
       statsToMerge.insert(i);
       foundFlag = TRUE;

       // Save the histogram with most intervals.
       if (( saveNumBuckets == 0 ) || // anything is better than this
           (colStats->getHistogram()->entries() > saveNumBuckets )) // for VEG Ref
       { //mar: need this because of TEST014, q14, where
         //     an assertion failed because rootStatIndex didn't
         //     change from its init value of 1000
         saveNumBuckets = colStats->getHistogram()->entries() ;
         rootStatIndex = i ;
       }
     }   // for outerRefCount
   }  // for EqLocalPreds

   if ( !foundFlag )
   {
     // --------------------------------------------------------------
     // No usable equality predicates found.
     // Use alternative approach to guess-timating the outer join's
     // impact:
     // Examine the histograms whose shape was changed, if any, to
     // determine the number of rows from the left that need to be
     // null augmented.
     // Again, find the one with the greatest number of intervals.
     // --------------------------------------------------------------
     statsToMerge.clear();  // reset.

     for (CollIndex i = 0; i < (CollIndex)outerRefCount; i++)
     {
       ColStatDescSharedPtr statDesc = (*this)[i];
       ColStatsSharedPtr colStats = statDesc->getColStats();

       // We are not interested in considering virtual columns for joins
       // so skip to the next histogram
       if (colStats->isVirtualColForHist() )
         continue;

       if (colStats->isShapeChanged())
       {
         statsToMerge.insert (i);
         foundFlag = TRUE;

         if ((saveNumBuckets == 0) // we should accept any value for
             || (colStats->getHistogram()->entries() > saveNumBuckets))
         {                      // rootStatIndex if we have none thus far!
           saveNumBuckets = colStats->getHistogram()->entries() ;
           rootStatIndex = i ;
         }
       }
     }  // for outerRefCount
   }  // if !foundFlag

   // if foundFlag is TRUE,
   // statsToMerge will contain a list of indexes of the histogram to be merged
   // rootStatIndex will contain the index of the histogram of the joining
   // column with largest number of intervals
   return foundFlag;
 }

 // --------------------------------------------------------------------
 // It is a helper method used by left joins. It merges the rows
 // from the left side that did not match the right child back into
 // the joined histograms.
 // -------------------------------------------------------------------
 CostScalar
 ColStatDescList::computeLeftOuterJoinRC(NABoolean &foundFlag /*out*/,
                                         const ColStatDescList &leftColStatsList /*in*/,
                                         CollIndex rootStatIndex /*in*/)
 {
   ColStatDescSharedPtr joinStatDesc;
   ColStatDescSharedPtr origStatDesc;

   // ------------------------------------------------------------
   // Either an equality-predicate or some other shape-changing
   // predicate was involved in this join.  Determine the number
   // of rows that result from merging the original left table
   // with the join result.
   // ------------------------------------------------------------

   // rootStatIndex contains the index of the histogram that participated
   // in Join and amongst more than one joining histograms had most number
   // of intervals
   joinStatDesc = (*this)[rootStatIndex];
   ColStatsSharedPtr joinColStats = joinStatDesc->getColStatsToModify();
   CostScalar innerJoinRC = joinColStats->getRowcount();

   ItemExpr * statExpr = joinStatDesc->getVEGColumn().getItemExpr() ;
   OperatorTypeEnum statOper = statExpr->getOperatorType() ;
   // sanity check
   // We shall not assert for virtual columns, even though later we
   // shall not consider them for join
   // only the following histograms can participate in left joins
   // These could be
   // 1. VEG_REFERENCE,
   // 2. result of another left or semi-join (ITM_INSTANTIATE_NULL)
   // 3. result of UNION (ITM_VALUEIDUNION)
   // 4. result of rowset (ITM_UNPACKCOL or ITM_ROWSETARRAY_SCAN)
   // 5. IN list of more than COMP_INT_22 elements (ITM_NATYPE)
   // For any other column type, return LEFT rowcount as the final
   // left join RC
   if (NOT ( statOper == ITM_VEG_REFERENCE ||
             statOper == ITM_INSTANTIATE_NULL ||
             statOper == ITM_VALUEIDUNION ||
             statOper == ITM_UNPACKCOL ||
             statOper == ITM_ROWSETARRAY_SCAN ||
             joinColStats->isVirtualColForHist() ||
             statOper == ITM_NATYPE) )
   {
     CCMPASSERT ( "Incorrect expression participating in Join") ;
     // join result is not reliable any more
     joinColStats->setFakeHistogram(TRUE);
     // set the foundFlag to FALSE to indicate some error happened
     // so the row counts can be set appropriately
     foundFlag = FALSE;
     // in case of an error return the inner join row count as the final rowcount
     // from join.
     return (innerJoinRC);
   }

   // ------------------------------------------------------------
   // Once we have verified the correctness of the histogram
   // find the matching entry in the leftColStatsList....
   // It doesn't Have to be there, but it should be.
   // ------------------------------------------------------------
   NABoolean goodMatch = FALSE;
   for (CollIndex i = 0; i < leftColStatsList.entries(); i++)
   {
     // Skip any histograms created for virtual columns
     if (leftColStatsList[i]->getColStats()->isVirtualColForHist())
       continue;
     origStatDesc = leftColStatsList[i];

     ItemExpr * originalExpr = origStatDesc->getVEGColumn().getItemExpr() ;
     OperatorTypeEnum originalOper = originalExpr->getOperatorType() ;

     if (originalOper == ITM_ROWSETARRAY_SCAN)
       continue;

     // sanity check for the original left histogram
     if (NOT ( originalOper == ITM_VEG_REFERENCE ||
               originalOper == ITM_INSTANTIATE_NULL ||
               originalOper == ITM_VALUEIDUNION ||
               originalOper == ITM_UNPACKCOL ||
               originalOper == ITM_NATYPE) )
     {
       CCMPASSERT ( "Incorrect expression participating in Join") ;
       // join result is not reliable any more
       joinColStats->setFakeHistogram(TRUE);
       // set the foundFlag to FALSE to indicate some error happened
       // so the row counts can be set appropriately
       foundFlag = FALSE;
       // in case of an error return the inner join row count as the final rowcount
       // from join.
       return (innerJoinRC);
     }

     if ( originalExpr == statExpr ) // this is a ValueId comparison
     {
       if (originalOper != statOper)
       {
         // sanity check to ensure that the histogram type was not modified during inner join
         CCMPASSERT( "Two mismatched expressions being joined" ) ; // no reason this should fail
         // join result is not reliable any more
         joinColStats->setFakeHistogram(TRUE);
         // set the foundFlag to FALSE to indicate some error happened
         // so the row counts can be set appropriately
         foundFlag = FALSE;
         // in case of an error return the inner join row count as the final rowcount
         // from join.
         return (innerJoinRC);
       }
       goodMatch = TRUE ;
       break ;
     }
   } // for loop over leftColStatsList

   if (goodMatch)
   {
     // if all conditions satisfied, do an OR merge of the join result
     // to the original left histograms. This will add the rows from
     // the left histograms that did not match the rigth side
     joinStatDesc->mergeColStatDesc( origStatDesc,
                                     LEFT_JOIN_OR_MERGE,
                                     FALSE,
                                     REL_JOIN,
                                     FALSE/*dontMergeFVs*/);

     // In estimateCardinality, we ensure that the result of
     // Join is at least one. There is a possibility for cases
     // when the result is actually zero, that the mergeColStatDesc
     // reverts the row count back to 0. But because we want the minimum
     // cardinality to be one, we shall, once again uplift it to one.

     CostScalar oJoinResultRows = joinColStats->getRowcount();

     if (oJoinResultRows < csOne)
     {
       joinColStats->setRowsAndUec (csOne, csOne);
       oJoinResultRows = csOne;
     }
     // return the resultant rowcount. That would be result
     // of inner join + number of unmatched rows from the left side
     foundFlag = TRUE;
     return oJoinResultRows;
   }
   else
   {
     // Original histogram for the joined histogram not found.
     foundFlag = FALSE;
     // return leftRowCount as the final rowcount
     return innerJoinRC;
   }
 }

 // --------------------------------------------------------------------
 // It is a helper method used by full outer joins. It merges the rows
 // from the right side that did not match the left child back into
 // the joined histograms.
 // -------------------------------------------------------------------
 CostScalar
 ColStatDescList::computeFullOuterJoinRC(NABoolean &foundFlag /*out*/,
                                     const ColStatDescList &origColStatsList /*in*/,
                                     CollIndex rootStatIndex /*in*/)
 {
   ColStatDescSharedPtr joinStatDesc;
   ColStatDescSharedPtr origStatDesc;

   // ------------------------------------------------------------
   // Either an equality-predicate or some other shape-changing
   // predicate was involved in this join.  Determine the number
   // of rows that result from merging the original left table
   // with the join result.
   // ------------------------------------------------------------

   // rootStatIndex contains the index of the histogram that participated
   // in Join and amongst more than one joining histograms had most number
   // of intervals
   joinStatDesc = (*this)[rootStatIndex];
   ColStatsSharedPtr joinColStats = joinStatDesc->getColStatsToModify();
   CostScalar innerJoinRC = joinColStats->getRowcount();

   // since the right side would have been merged into the left, the right
   // column reference should appear in the mergedState of the joined result
   ValueIdSet mergedSet = joinStatDesc->getMergeState();

   // ------------------------------------------------------------
   // Once we have verified the correctness of the histogram
   // find the matching entry in the leftColStatsList....
   // It doesn't Have to be there, but it should be.
   // ------------------------------------------------------------
   NABoolean goodMatch = FALSE;
   for (CollIndex i = 0; i < origColStatsList.entries(); i++)
   {
     // Skip any histograms created for virtual columns
     if (origColStatsList[i]->getColStats()->isVirtualColForHist())
       continue;

     origStatDesc = origColStatsList[i];
     ValueId originalCol = origStatDesc->getColumn();
     OperatorTypeEnum originalOper = originalCol.getItemExpr()->getOperatorType() ;

     if (originalOper == ITM_ROWSETARRAY_SCAN)
       continue;

     if ( mergedSet.containsTheGivenValue(originalCol)) // this is a ValueId comparison
     {
       goodMatch = TRUE ;
       break ;
     }
   } // for loop over leftColStatsList

   if (goodMatch)
   {
     // if all conditions satisfied, do an OR merge of the join result
     // to the original left histograms. This will add the rows from
     // the left histograms that did not match the right side
     joinStatDesc->mergeColStatDesc( origStatDesc,
                                     LEFT_JOIN_OR_MERGE,
                                     FALSE,
                                     REL_JOIN,
                                     FALSE/*dontMergeFVs*/);

     // In estimateCardinality, we ensure that the result of
     // Join is at least one. There is a possibility for cases
     // when the result is actually zero, that the mergeColStatDesc
     // reverts the row count back to 0. But because we want the minimum
     // cardinality to be one, we shall, once again uplift it to one.

     CostScalar oJoinResultRows = joinColStats->getRowcount();

     if (oJoinResultRows < csOne)
     {
       joinColStats->setRowsAndUec (csOne, csOne);
       oJoinResultRows = csOne;
     }
     // return the resultant rowcount. That would be result
     // of inner join + number of unmatched rows from the left side
     foundFlag = TRUE;
     return oJoinResultRows;
   }
   else
   {
     CCMPASSERT("Original histogram for the joined histogram in full outer join not found");
     // Original histogram for the joined histogram not found.
     foundFlag = FALSE;
     // return inner join RC as the final rowcount
     return innerJoinRC;
   }
 }

 // ----------------------------------------------------------------------------------
 // This is a helper method used by left joins.
 // The method is called after the the rows from the left histograms of the joining column,
 // that did not match the right side are merged back into the join result.
 // In the following method, the histograms from the remaining columns are synchronized
 // to have the same row count
 // --------------------------------------------------------------------------------
 void
 ColStatDescList::synchronizeHistsWithOJRC(NAList<CollIndex> &statsToMerge /*in */,
                                           CollIndex startIndex /*in*/,
                                           CollIndex stopIndex /*in*/,
                                           CollIndex rootStatIndex /*in*/,
                                           const ColStatDescList &origColStatsList /*in*/,
                                           CostScalar &oJoinResultRows /*out*/,
                                           CostScalar &baseRows /*out*/)
 {
   // ---------------------------------------------------------------
   // Force All histograms to have the rowCount predicted previously:
   // either by mergeColStatDesc, or by the sWAG.
   //
   // Examine each histogram from the Left table.  If it had a shape-
   // changing predicate applied, OR the join result with the original
   // histogram for that column, as in the root case.
   // Otherwise, just changing their reduction factor to scale them to
   // the sWAG'ed result size.
   // ---------------------------------------------------------------
   CollIndex i = 0;
   for (i = startIndex; i < stopIndex; i++)
   {
     // indicates whether this histogram had a predicate applied to it
     NABoolean inPredicate = FALSE;
     // indicates whether we were able to find a match for the joined
     // histogram from the list of original histograms
     NABoolean goodMatch = FALSE;

     // The histogram with rootStatIndex has already been merged
     // so skip that
     if ( rootStatIndex == i )
       continue ;

     // See if this column was involved in a predicate.
     // statsToMerge contain all the histogram indexes
     // that participated in join
     // Break out of the loop that
     CollIndex j = 0;
     for (j = 0; j < statsToMerge.entries() && !inPredicate; j++)
     {
       if ( i == statsToMerge[j] )
         inPredicate = TRUE;
     }

     // ------------------------------------------------------
     // If involved in a predicate: do merge based calculation.
     // And, in either case, Tweek the resulting row count to
     // match that predicted above.
     // ------------------------------------------------------
     ColStatDescSharedPtr jStatDesc = (*this)[i];
     ColStatsSharedPtr jColStats = jStatDesc->getColStatsToModify();

     // if the histogram is for virtual columns, or from rowsets,
     // skip mergebased calculations, simply synchronize the histogram
     // with the rowcount computed earlier.
     if (jColStats->isVirtualColForHist() )
     {
       CostScalar oldCount = jColStats->getRowcount();
       if (oldCount != oJoinResultRows)
         jStatDesc->synchronizeStats (oldCount, oJoinResultRows);
       continue;
     }

     ItemExpr * statExpr = jStatDesc->getVEGColumn().getItemExpr() ;
     OperatorTypeEnum statOper = statExpr->getOperatorType() ;

     if (statOper == ITM_ROWSETARRAY_SCAN)
     {
       CostScalar oldCount = jColStats->getRowcount();
       if (oldCount != oJoinResultRows)
         jStatDesc->synchronizeStats (oldCount, oJoinResultRows);
       continue;
     }

     // sanity check to see if we have correct column type
     // participating in left join. If not, then set the rowcount
     // of all histograms equal to the rowcount obtained so far
     // that is from an inner join + merging original left histogram
     // to the left histogram after inner join and use that to synchronize
     // all histograms and return
     if (NOT(statOper == ITM_VEG_REFERENCE ||
             statOper == ITM_INSTANTIATE_NULL ||
             statOper == ITM_VALUEIDUNION ||
             statOper == ITM_UNPACKCOL ||
             statOper == ITM_NATYPE) )
     {
       CCMPASSERT ( "Incorrect column type participating in Join") ;
       // join result is not reliable any more
       jColStats->setFakeHistogram(TRUE);
       // don't assert for customers, synchronize histograms with
       // left rowcount and continue
       CostScalar oldCount = jColStats->getRowcount();
       if (oldCount != oJoinResultRows)
         jStatDesc->synchronizeStats (oldCount, oJoinResultRows);
       continue;
     }

     // if there is any issue with the joining histograms
     // do not bother to
     // --------------------------------------------------------
     // find the matching entry in the leftColStatsList....
     // It doesn't Have to be there, but it should be.
     // --------------------------------------------------------
     CollIndex matchPoint = 0;
     for (j = 0; j < origColStatsList.entries() ; j++)
     {
       ColStatDescSharedPtr oStatDesc = origColStatsList[j];
       ColStatsSharedPtr oColStats = oStatDesc->getColStats();

       // skip any histograms created for virtual columns, We do not
       // want to join on these. continue, so that the joined histogram
       // can be synchronized with the previous joined result
       if (oColStats->isVirtualColForHist() )
         continue;

       ItemExpr * originalExpr = oStatDesc->getVEGColumn().getItemExpr() ;
       OperatorTypeEnum originalOper = originalExpr->getOperatorType() ;

       // skip any histograms created for rowsets, We do not
       // want to join on these. continue, so that the joined histogram
       // can be synchronized with the previous joined result
       if (originalOper == ITM_ROWSETARRAY_SCAN)
         continue;

       // sanity check
       if (NOT( originalOper == ITM_VEG_REFERENCE ||
               originalOper == ITM_INSTANTIATE_NULL ||
               originalOper == ITM_VALUEIDUNION ||
               originalOper == ITM_UNPACKCOL ||
               originalOper == ITM_NATYPE ))
       {
         CCMPASSERT ( "Incorrect expression participating in Join") ;
         // We found an incorrect column type in the list of histograms
         // assert for debug, and skip for release compiler
         continue;
       }
       if (statExpr == originalExpr) // ValueId comparison
       {
         if (statOper != originalOper)
         {
           CCMPASSERT( "Mismatched expressions being joined" ) ; // no reason this should fail
           // We found an incorrect column type in the list of histograms
           // assert for debug, and skip for release compiler
           continue;
         }
         goodMatch = TRUE;
         matchPoint = j;
         break ;
       }
     } // all entries of leftColStatsList

     if (goodMatch)
     {
       ColStatDescSharedPtr oStatDesc = origColStatsList[matchPoint];
       ColStatsSharedPtr oColStats = oStatDesc->getColStats();

       if ( inPredicate )
       {
         jStatDesc->mergeColStatDesc (oStatDesc,
                                     LEFT_JOIN_OR_MERGE,
                                     FALSE,
                                     REL_JOIN);
         CostScalar oldCount = jColStats->getRowcount();
         if (oldCount != oJoinResultRows)
         jStatDesc->synchronizeStats (oldCount, oJoinResultRows);
       }
       else
       {
         // apply change in selectivity from the predicate to
         // all histograms not in the predicate.  Only the
         // rowcount reduction factor is changed
         jStatDesc->synchronizeStats (baseRows,
         oJoinResultRows,
         ColStatDesc::DO_NOT_REDUCE_UEC) ;
       }
     }
     else
     {
       CCMPASSERT( "Can't find the left histogram to associate in outer join" ) ;
       // somehow can't find a left entry to associate... should
       // not happen, but if it does....
       CostScalar oldCount = jColStats->getRowcount();
       if (oldCount != oJoinResultRows)
       jStatDesc->synchronizeStats (oldCount, oJoinResultRows);
     }
   }  // for outerRefCount
 }

 // -----------------------------------------------------------------------
 // This is a helper method for left joins. It is used to
 // NULL instantiate the right histogram rows from the other side with NULLs
 // ------------------------------------------------------------------------
 void
 ColStatDescList::nullInstantiateHists(CollIndex startIndex,
                                       CollIndex stopIndex,
                                       CostScalar &oJoinResultRows,
                                       ValueIdList &nulledVIds)
 {
   // ------------------------------------------------------------
   // Next, Instantiate Nulls in histograms from the right table.
   // Also, alter the descriptions the histograms have of themselves
   // such that they indicate that they are null-instantiated.
   // ------------------------------------------------------------

   // Histograms from right table start from the outerRefCount index
   for (CollIndex i = startIndex; i < stopIndex; i++)
   {
     ColStatDescSharedPtr jStatDesc = (*this)[i];
     ColStatsSharedPtr jColStats = jStatDesc->getColStatsToModify();
     // before computing the number of NULLs to add to the histogram
     // apply any reduction factor to the intervals that is remaining
     jColStats->scaleHistogram(1);

     // Skip any histograms created for virtual columns
     if (jColStats->isVirtualColForHist())
       continue;

     // $$$ BETA_NOT_FCS
     // $$$ kludge to fix genesis case 10-980224-5150
     // $$$ (this should never happen!)
     if ( oJoinResultRows < jColStats->getRowcount() )
     {
       jStatDesc->synchronizeStats (jColStats->getRowcount(), // baseRows
                                     oJoinResultRows,              // newRows
                                     ColStatDesc::DO_NOT_REDUCE_UEC) ;
       jColStats = jStatDesc->getColStatsToModify() ;
     }
     // $$$ BETA_NOT_FCS

     jColStats->nullAugmentHistogram(oJoinResultRows);

     ItemExpr *pred = jStatDesc->VEGColumn().getItemExpr();

     for (CollIndex j=0; j < nulledVIds.entries() ; j++)
     {
       ItemExpr *nulledExpr = nulledVIds[j].getItemExpr();

       if (nulledExpr->getOperatorType() != ITM_INSTANTIATE_NULL)
       {
         CCMPASSERT(nulledExpr->getOperatorType() == ITM_INSTANTIATE_NULL);
         continue;
       }

       // NULL instantiate only those histograms which have not participated
       // in any join. The reason is that we do not keep original left histograms
       ValueId nulledValueId = nulledExpr->child(0).getValueId();
       if (pred->getValueId() == nulledValueId)
       {
         // overwrite column-stats VEGColumn()'s valueId
         // with the valueId of the Instantiate Null
         jStatDesc->VEGColumn() = nulledVIds[j];

         // we need to update the merge state of these InstantiateNulls
         // because otherwise we will get into a rut when we hit the
         // ColStatDesc::mergeColStatDesc() -- similar to the situation
         // with ValueIdUnions, handled in Union::synthEstLogProp()
         // replace the base column from the merge state by null
         // instantiated column to reflect the histogram is now null
         // instantiated
         if (jStatDesc->mergeState().entries() ==1)
           jStatDesc->mergeState().clear();
         else
         {
           // remove the corresponding column from merge state and replace that
           // with the null instantiated one. Rest of the merge state is not impacted
           ValueIdSet mergeState = jStatDesc->mergeState();
           // merge state contains either base columns or null instantiated column
           // references. Hence if we don't find the nulledvalueid directly
           // get the base column from it and compare that
           if ((!mergeState.contains(nulledValueId)) &&
               (nulledValueId.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE))
           {
             mergeState.removeCoveredVIdSet((ValueIdSet) nulledValueId);
           }
           else
             mergeState.remove(nulledValueId);
           jStatDesc->mergeState().clear();
           jStatDesc->mergeState().insert(mergeState);
         }
         jStatDesc->mergeState().insert( nulledExpr->getValueId() ) ;

         break ;
         // what, if anything should be done with the raw column
         // list associated with this col stat desc??  cmf
       }
     } // for j - nullVIds
   } // for i - outerRefCount -> joinStatDescList
 }

 ULng32
 MultiColumnSkewedValueLists::HashFunction (const ValueIdList & input)
 {
   ULng32 retval = 1 + input.entries();

   for (CollIndex i=0; i < input.entries(); i++)
   {
     retval += (CollIndex) input[i];  // add up the ValueId's
   }

   return retval ;
 }

 MultiColumnSkewedValueLists::MultiColumnSkewedValueLists () :
      HASHDICTIONARY(ValueIdList,MCSkewedValueList) (&(MultiColumnSkewedValueLists::HashFunction),
                                             5,
                                             TRUE,
                                             HISTHEAP )
      { };

 // -----------------------------------------------------------------------
 // MultiColumnSkewedValueLists :: constructor
 //
 // This class is modeled after  MultiColumnUecList class and mirrors its
 // interfaces and usage
 // -----------------------------------------------------------------------
 MultiColumnSkewedValueLists::MultiColumnSkewedValueLists (const StatsList   & initStats,
                                         const ValueIdList & tableColumns) :
      HASHDICTIONARY(ValueIdList,MCSkewedValueList) (&(MultiColumnSkewedValueLists::HashFunction),
                                             5,
                                             TRUE,
                                             HISTHEAP )
 {
   // loop through the list of NAColumnArray's
   for ( CollIndex i = 0; i < initStats.groupUecColumns_.entries(); i++ )
   {
     const NAColumnArray & cols = initStats.groupUecColumns_[i];

     if(cols.entries() == 1)
       continue;

     ValueIdList groupCols;
     for ( CollIndex j = 0; j < cols.entries(); j++ )
     {
 	  Lng32 position = cols[j]->getPosition();
 	  const ValueId & id = tableColumns[position];
 	  groupCols.insert( id );
     }

     ValueIdList * key  = new (HISTHEAP) ValueIdList( groupCols );
     MCSkewedValueList * value = new (HISTHEAP) MCSkewedValueList (initStats.groupMCSkewedValueLists_[i], HISTHEAP);
     insert( key, value );
   }
 }

 const MCSkewedValueList * MultiColumnSkewedValueLists::getMCSkewedValueList(ValueIdSet columns, ValueIdList &colGroup)
 {
   ValueIdList * keyEntry = NULL;
   MCSkewedValueList * valueEntry = NULL;

   CollIndex noOfCols = columns.entries();

   // we need to iterate through all entries in this list
   MultiColumnSkewedValueListsIterator iter( *this );

   for ( iter.getNext( keyEntry, valueEntry );
 	keyEntry != NULL && valueEntry != NULL;
 	iter.getNext( keyEntry, valueEntry ) )
   {
     if(noOfCols != (*keyEntry).entries())
       continue;

     if(columns == ValueIdSet(*keyEntry))
     {
       colGroup = *keyEntry;
       return valueEntry;
     }
   }
   return NULL;
 }

 const MCSkewedValueList * ColStatDescList::getMCSkewedValueListForCols(ValueIdSet inputCols, ValueIdList &colGroup)
 {
   if(mcSkewedValueLists_)
     return mcSkewedValueLists()->getMCSkewedValueList(inputCols, colGroup);
   else
     return NULL;
 }

 CostScalar ColStatDescList::getAvgRowcountForNonSkewedMCValues(ValueIdSet cols, MCSkewedValueList* mCSkewedValueList)
 {
   CostScalar avgRowcountForNonSkewedMCValues = 1;
   if(mCSkewedValueList && getUecList())
   {
     CostScalar mcUecForCols = getUecList()->lookup(cols);
     CollIndex noOfMCSkewValues = mCSkewedValueList->entries();
     if(mcUecForCols - noOfMCSkewValues > csZero)
     {
       CostScalar totalRowcountOfMCSkewedValues;
       for(CollIndex i=0; i<noOfMCSkewValues; i++)
         totalRowcountOfMCSkewedValues += mCSkewedValueList->at(i)->getFrequency();

       CostScalar totalRowcount = (*this)[0]->getColStats()->getRowcount();
       avgRowcountForNonSkewedMCValues = (totalRowcount - totalRowcountOfMCSkewedValues)/(mcUecForCols - noOfMCSkewValues);
     }
   }
   return avgRowcountForNonSkewedMCValues;
 }

 // eof