/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File:         OptItemExpr.C
* Description:  Item expressions (optimizer-related methods)
* Language:     C++
*
*
*
*
******************************************************************************
*/

// -----------------------------------------------------------------------

#include "Sqlcomp.h"
#include "GroupAttr.h"
#include "AllItemExpr.h"
#include "Cost.h"         /* for lookups in the defaults table */


// -----------------------------------------------------------------------
//  Methods for ItemExpr
// -----------------------------------------------------------------------
NABoolean ItemExpr::isAnEquiJoinPredicate
                       (const GroupAttributes* const leftGroupAttr,
			const GroupAttributes* const rightGroupAttr,
			const GroupAttributes* const joinGroupAttr,
			ValueId & leftChildValueId,
			ValueId & rightChildValueId,
			NABoolean & isOrderPreserving) const
{
  // This expression MUST be a binary predicate expression.
  if (NOT isAPredicate())
    return FALSE;

  NABoolean isAnEquiJoin = FALSE;

  // Check for equality predicates
  if (getOperatorType() == ITM_EQUAL)
    {
      // a join predicate of the form col1 = col2 or similar
      ItemExpr * leftExpr  = child(0);
      ItemExpr * rightExpr = child(1);

      leftChildValueId  = leftExpr->getValueId();
      rightChildValueId = rightExpr->getValueId();

      // the two sides of the comparison operator have to be supplied
      // by the two different child  nodes
      if (CmpCommon::getDefault(COMP_BOOL_113) == DF_OFF)
        {
          // fix to soln 10-080604-3651 is to relax this test from 
          // "isCharacteristicOutput()" to "covers"
          const ValueIdSet &inputs = joinGroupAttr->getCharacteristicInputs();
          ValueIdSet refs;
          if (leftGroupAttr->covers(leftChildValueId, inputs, refs) AND
              rightGroupAttr->covers(rightChildValueId, inputs, refs))
            {
              isAnEquiJoin = TRUE;
            }
          else if (leftGroupAttr->covers(rightChildValueId, inputs, refs) AND
                   rightGroupAttr->covers(leftChildValueId, inputs, refs))
            {
              isAnEquiJoin = TRUE;
              leftChildValueId  = rightChildValueId;
              rightChildValueId = leftExpr->getValueId();
            }
        }
      else // they want the old behavior
        {
          if (leftGroupAttr->isCharacteristicOutput(leftChildValueId) AND
              rightGroupAttr->isCharacteristicOutput(rightChildValueId))
            {
              isAnEquiJoin = TRUE;
            }
          else if (leftGroupAttr->isCharacteristicOutput(rightChildValueId) AND
                   rightGroupAttr->isCharacteristicOutput(leftChildValueId))
            {
              isAnEquiJoin = TRUE;
              leftChildValueId  = rightChildValueId;
              rightChildValueId = leftExpr->getValueId();
            }
        }

      // Now, check whether this is a predicate that preserves ordering
      // EX:  The following predicates are order preserving
      //          T1.A = T2.X;  T1.A = T2.X+<constant>;
      //
      //      The following predicates are not order preserving
      //          T1.A = T2.X+T2.Y;  T1.A=1/T2.X;  T1.A= ABS(T2.X);
      if (leftExpr->isOrderPreserving() && rightExpr->isOrderPreserving())
	isOrderPreserving = TRUE;
      else
	isOrderPreserving = FALSE;
    } // endif is an "="
  else if (getOperatorType() == ITM_VEG_PREDICATE)
    {
      // Check whether this VEG Predicate is a true equi-join predicate.
      // If the VEG contains a constant, then we know this is not
      // a "true" join predicate.  Also, check to ensure whether
      // both children deliver the VEGReference that belong to this VEG.

      VEG * predVEG = ((VEGPredicate *)this)->getVEG();
      const ValueIdSet & VEGGroup = predVEG->getAllValues();

      NABoolean containsConstant = VEGGroup.referencesAConstExpr();

      if (containsConstant)
	return FALSE;

      ValueId vegRefId = predVEG->getVEGReference()->getValueId();

      // If this is a left join or a anti-semi the right may be
      // producing a a VEGRef that is contained in the VEGRef
      // leftChildValueId
      leftChildValueId = NULL_VALUE_ID;
      if (leftGroupAttr->isCharacteristicOutput(vegRefId))
        {
	  leftChildValueId = vegRefId;
        }
      else
        {
          ValueIdSet referencedOutputs;
          const NABoolean doNotLookInsideVegReferences = TRUE ;

          // Pass the parameter to accumulateReferencedValues to
          // indicate that we should not dig inside embedded vegrefs,
          // because in some cases a VegRef can contain itself, and
          // then we can get stuck in an infinite loop.
          //
          //should add additional comments here explaining
          // in more detail why we don't want to look inside embedded
          // veg ref's.  And we should all probably look for cases
          // where we *DO* look inside veg ref's when we shouldn't ...
          referencedOutputs.accumulateReferencedValues(
            leftGroupAttr->getCharacteristicOutputs(),   /* IN */
            VEGGroup,                                    /* IN */
            doNotLookInsideVegReferences);               /* IN */
          if ( referencedOutputs.entries() > 0 )
            referencedOutputs.getFirst(leftChildValueId);
        }

      rightChildValueId = NULL_VALUE_ID;
      if (rightGroupAttr->isCharacteristicOutput(vegRefId))
        {
          rightChildValueId = vegRefId;
        }
      else
        {
          ValueIdSet referencedOutputs;
          const NABoolean doNotLookInsideVegReferences = TRUE ;

          referencedOutputs.accumulateReferencedValues(
            rightGroupAttr->getCharacteristicOutputs(),  /* IN */
            VEGGroup,                                    /* IN */
            doNotLookInsideVegReferences);               /* IN */
          if ( referencedOutputs.entries() > 0 )
            referencedOutputs.getFirst(rightChildValueId);
        }

      if (leftChildValueId AND rightChildValueId)
	isAnEquiJoin = TRUE;

      isOrderPreserving = TRUE;

    } // endif is a VEGPredicate

  return isAnEquiJoin;

} // ItemExpr::isAnEquiJoinPredicate


// -----------------------------------------------------------------------
// ItemExpr::isANestedJoinPredicate
// ABSTRACT:
// Use this method to figure out whether a predicate in an
// operator under a nested join is a join. It returns
// TRUE is it is, FALSE otherwise.
//
// INPUTS:
//    const ValueIdSet& inputValues: the characteristic inputs
//      of the operator
//    const ValueIdSet& operatorValues: the values available
//      to the operator (i.e. if the operator is a DP2Scan,
//      they would contain the columns of the table or index
//      being scanned.
// -----------------------------------------------------------------------

NABoolean
ItemExpr::isANestedJoinPredicate (
     const ValueIdSet& inputValues,
     const ValueIdSet& operatorValues) const
{
  // This expression MUST be a binary predicate expression.
  CMPASSERT(isAPredicate());



  // Assume it is not a nested join pred:
  NABoolean isANestedJoinPred = FALSE;
  const Int32 arity = getArity(); // for debugging
  switch (arity)
    {
    case 2:
      {
        // a join predicate of the form col1 op col2 or similar
        const ItemExpr *leftExpr  = child(0);
        const ItemExpr *rightExpr = child(1);

        const ValueId &leftChildVid  = leftExpr->getValueId();
        const ValueId &rightChildVid = rightExpr->getValueId();


        // -------------------------------------------------------------------
        // one operand must be in the operator's inputs
        // and the other in the operator's values:
        // --------------------------------------------------------------------
        //
		//check if the item expr is a non-strict constant
		//a strict constant is somethine like cos(1)
		//where as cos(?p) can be considered a constant
		//in the non-strict definition since it remains
        //constant for a given execution of a query
        if ( !(leftExpr->doesExprEvaluateToConstant(FALSE)) AND
	         !(rightExpr->doesExprEvaluateToConstant(FALSE)))
	  {

            // We need to explode the values because they may
            // contain references and "referencesOneValueFrom"
            // does not explode references
            ValueIdSet iv,ov;
            iv.replaceVEGExpressionsAndCopy(inputValues);
            ov.replaceVEGExpressionsAndCopy(operatorValues);

            // Now check for containment
	    if ( (leftExpr->referencesOneValueFrom(iv)
                  AND
                  rightExpr->referencesOneValueFrom(ov))
                 OR
                 (rightExpr->referencesOneValueFrom(iv)
                  AND
                  leftExpr->referencesOneValueFrom(ov))
                 )
	      {
	        isANestedJoinPred = TRUE;
	      }
	  }
      }
    break;

    case 0:
      if (getOperatorType() == ITM_VEG_PREDICATE)
	{
	  // Check whether this VEG Predicate is a true equi-join predicate.
	  // If the VEG contains a constant, then we know this is not
	  // a "true" join predicate.  Also, check to ensure whether
	  // both children deliver the value that belongs to this VEG.

          const VEG * predVEG = ((VEGPredicate *)this)->getVEG();
          const ValueIdSet & VEGGroupForPred = predVEG->getAllValues();
          NABoolean containsConstant = VEGGroupForPred.referencesAConstExpr();
	  if (NOT containsConstant)
	    {
              // We need to explode the values because they may
              // contain references and "referencesOneValueFrom"
              // does not explode references
              ValueIdSet iv,ov;
              iv.replaceVEGExpressionsAndCopy(inputValues);
              ov.replaceVEGExpressionsAndCopy(operatorValues);
	      if (VEGGroupForPred.referencesOneValueFromTheSet(iv)
                  AND
                  VEGGroupForPred.referencesOneValueFromTheSet(ov)
                  )
		{
		  isANestedJoinPred = TRUE;
		}
	    } // endif is a constant
	}
      break ;

    default:
      isANestedJoinPred = FALSE ;
      break ;
    } // switch on getArity()

  return isANestedJoinPred;
} // ItemExpr::isANestedJoinPredicate (...)

void ItemExpr::accumulateConstExprs(ValueIdSet & constExprs)
{
  ValueIdSet predSet;
  getLeafPredicates(predSet);

  for( ValueId tempId = predSet.init();
      predSet.next( tempId );
      predSet.advance( tempId ) )
  {
    ItemExpr *tempExpr = tempId.getItemExpr();
    for (Lng32 i = 0; i < (Lng32)tempExpr->getArity(); i++)
    {
      if(tempExpr->child(i)->doesExprEvaluateToConstant(FALSE,TRUE))
      {
	constExprs += tempExpr->getValueId();
	break;
      }
    }
  }
}

NABoolean ItemExpr::isOrderPreserving() const
{
  return TRUE;
}

// -----------------------------------------------------------------------
// ItemExpr::applyDefaultPred
// This method is a virtual method for ItemExpr class. This method is redefined
// for derived classes of ItemExpr class. This method is used to apply selectivity
// of default predicates on histograms.
//
// In this default version, the columns are checked for existence of statistics.
// If they are found, an assertion is thrown in debug mode as this is considered
// an unexpected condition. In release mode, alreadyApplied flag is set to TRUE
// and returned.
// -----------------------------------------------------------------------
NABoolean ItemExpr::applyDefaultPred(ColStatDescList & histograms,
			                         OperatorTypeEnum exprOpCode,
						 ValueId predValueId,
                                     NABoolean & globalPredicate,
                                     CostScalar *maxSelectivity)
{
  NABoolean alreadyApplied = FALSE;
  ValueIdSet leftLeafValues;
  CollIndex leftColIndex;

   if(maxSelectivity==NULL && (getArity() > 0) &&
      (child(0)->checkForStats(histograms, leftColIndex, leftLeafValues)))
   {
      DCMPASSERT( FALSE ); // unexpected condition!
      alreadyApplied = TRUE;
   }
   return alreadyApplied;
}

// -----------------------------------------------------------------------
// ItemExpr::applyUnSuppDefaultPred
// This method is used to apply selectivity of unsupported default predicates
// on histograms.
//
// -----------------------------------------------------------------------
NABoolean ItemExpr::applyUnSuppDefaultPred(ColStatDescList & histograms,
						 ValueId predValueId,
						 NABoolean & globalPredicate)
{
  NABoolean alreadyApplied = FALSE, statsExist = FALSE;
  ValueIdSet leftLeafValues;
  CollIndex leftColIndex;

  OperatorTypeEnum op = getOperatorType();
  CostScalar defaultSel     = csOne;

  if ( getArity() > 0 )
  {
    NABoolean isOpTypeNot = FALSE;  
    ItemExpr * tempPred;
    OperatorTypeEnum tempOp;
	  
    ItemExpr * leftChild = child(0);

    if(getOperatorType() == ITM_NOT)
    {
      isOpTypeNot = TRUE;
      tempOp = leftChild->getOperatorType();
      tempPred = leftChild;
      leftChild = leftChild->child(0);
    }
    else
    {
      tempOp = op;
      tempPred = this;
    }

    statsExist = leftChild->checkForStats(histograms, leftColIndex, leftLeafValues);

    // First Question: is the predicate of the form
    //   "<col> <op> <expression>"  or  "<expression> <op> <col>"?
    // Earlier logic places stand-alone columns on the left, so look
    // for a histogram associated with the left-hand valueId.
    //

    if(statsExist)
    {
      globalPredicate = FALSE;   // not a 'global' predicate
      ColStatDescSharedPtr statDesc = (histograms)[leftColIndex];

      if ( NOT ( statDesc->isPredicateApplied( predValueId ) ) )
      {
        CostScalar oldRowcount = statDesc->getColStats()->getRowcount();

        // first time for this histogram
        // if the predicate is a LIKE predicate with no wild cards
        // in the pattern. And for some oreason could not be transformed
        // into an equality predicate, then set its selectivity equal to
        // 1/UEC, else go the usual way
		  
        if ( (tempOp == ITM_LIKE) && 
           ((Like *)tempPred)->isPatternAStringLiteral())
        {
          ColStatsSharedPtr colStat = statDesc->getColStats();

          if (colStat->isFakeHistogram())
            defaultSel = 1.0/(CURRSTMT_OPTDEFAULTS->defNoStatsUec()) ;
          else
          {
            CostScalar tempUec = colStat->getTotalUec();

            //To guard against div-by-zero assertion
            if(tempUec == csZero)
              tempUec = 1;
				
            defaultSel = 1.0/tempUec.value();
            defaultSel.maxCsOne();
          }

          if(isOpTypeNot)
            defaultSel = 1 - defaultSel.value();
        }
        else
          defaultSel = this->defaultSel();

        statDesc->addToAppliedPreds( predValueId );
        statDesc->applySel( defaultSel );

        // If user specified selectivity for this predicate, we need to make
        // adjustment in reduction to reflect that.
        statDesc->applySelIfSpecifiedViaHint(this, oldRowcount);

      } // NOT isPredicateApplied
      else
      {
        alreadyApplied = TRUE;
      }
    } // column is leading prefix of histogram
  } // if ( pred->getArity() > 0 )
  else
  {
    defaultSel = this->defaultSel();
  }
  return alreadyApplied;
}

NABoolean ItemExpr::checkForStats(ColStatDescList & histograms, 
				  CollIndex & columnIndex, 
				  ValueIdSet & leafValues)
{
  ItemExpr * tempPred = this->getLeafValueIfUseStats();
  leafValues.clear();
  tempPred->findAll(ITM_BASECOLUMN, leafValues, TRUE, TRUE);
  return histograms.getColStatDescIndexForColumn(columnIndex, tempPred->getValueId());
}

// Default case
NABoolean ItemExpr::calculateMinMaxUecs(ColStatDescList & histograms,
					CostScalar & minUec,
					CostScalar & maxUec)
{
 switch(getOperatorType())
 {
   case ITM_HOSTVAR:
   case ITM_DYN_PARAM:
   case ITM_CACHE_PARAM:
     minUec = maxUec = 1;
     return TRUE;
   default:
     minUec = maxUec = csMinusOne;
     return FALSE;
 }
}

NABoolean ItemExpr::calculateUecs(ColStatDescList & histograms,
                                 CostScalar & minUec,
                                 CostScalar & maxUec)
{
  CostScalar uec = csMinusOne;
  CollIndex leftColIndex;
  ValueIdSet leftLeafValues;

  if(checkForStats(histograms, leftColIndex, leftLeafValues))
  {
    ColStatDescSharedPtr statDesc = (histograms)[leftColIndex];
    minUec = statDesc->getColStats()->getTotalUec();
    maxUec = statDesc->getColStats()->getRowcount();    
    return TRUE;
  }
  else
    return FALSE;
}

void ItemExpr::resetRealBigNumFlag(ItemExpr *node)
{
  for (Int32 i=0; i < node->getArity(); i++)
    {
      resetRealBigNumFlag(node->child(i));
    }
  if((node->getOperatorType() == ITM_CAST) &&
   (((Cast *)node)->getType()->getTypeQualifier() == NA_NUMERIC_TYPE))
  {
     NumericType *numType = (NumericType *)((Cast *)node)->getType();
     if(numType->isBigNum() &&
      ((SQLBigNum *)numType)->isARealBigNum())
     {
        ((SQLBigNum *)numType)->resetRealBigNum();
     }
  }
}

NABoolean VEGReference::calculateMinMaxUecs(ColStatDescList & histograms,
					    CostScalar & minUec,
					    CostScalar & maxUec)
{
  return calculateUecs(histograms, minUec, maxUec);
}

NABoolean BaseColumn::calculateMinMaxUecs(ColStatDescList & histograms,
					  CostScalar & minUec,
					  CostScalar & maxUec)
{
  return calculateUecs(histograms, minUec, maxUec);
}

// -----------------------------------------------------------------------
//  Methods for class BiArith
//
// -----------------------------------------------------------------------
NABoolean BiArith::calculateMinMaxUecs(ColStatDescList & histograms,
					CostScalar & minUec,
					CostScalar & maxUec)
{
    CollIndex colIndex = 0;
    ValueIdSet leafValues;

  // Certain functions such as DAYOFYEAR, WEEK get converted 
  // to BiArtih objects. They handled here.

  if(origOpType()== ITM_DAYOFYEAR)
  {
    minUec = maxUec = 366;
    return TRUE;
  }
  else if(origOpType()== ITM_WEEK)
  {
    minUec = maxUec = 54;
    return TRUE;
  }
  else if(origOpType()== ITM_DATE_TRUNC_DAY)
  {
    if(child(0)->checkForStats(histograms, colIndex, leafValues))
    {
      ColStatDescSharedPtr statDesc = (histograms)[colIndex];
      ColStatsSharedPtr colStats = statDesc->getColStats();
      double timeEncompassedInHistogram = (colStats->getMaxValue().getDblValue() - colStats->getMinValue().getDblValue());
      
      // Reduce the UEC by equivalent of 12 hours worth of time. i.e. 86400 / 2. (86400 secs in a day)
      double timeEncompassedInHistogramAfterFunctionApplied = MAXOF((timeEncompassedInHistogram - 43200), 0);
      
      minUec = maxUec = (colStats->getTotalUec() * (timeEncompassedInHistogramAfterFunctionApplied/timeEncompassedInHistogram));
      return TRUE;
    }
  }
  else if(origOpType()== ITM_DATE_TRUNC_MONTH)
  {
    if(child(0)->checkForStats(histograms, colIndex, leafValues))
    {
      ColStatDescSharedPtr statDesc = (histograms)[colIndex];
      ColStatsSharedPtr colStats = statDesc->getColStats();
      double timeEncompassedInHistogram = (colStats->getMaxValue().getDblValue() - colStats->getMinValue().getDblValue());

      // Reduce the UEC by equivalent of 15 days worth of time. i.e. 86400 * 15. (86400 secs in a day)
      double timeEncompassedInHistogramAfterFunctionApplied = MAXOF((timeEncompassedInHistogram - 1296000), 0);

      minUec = maxUec = (colStats->getTotalUec() * (timeEncompassedInHistogramAfterFunctionApplied/timeEncompassedInHistogram));
      return TRUE;
    }
  }
  else if(origOpType()== ITM_DATE_TRUNC_YEAR)
  {
    if(child(0)->checkForStats(histograms, colIndex, leafValues))
    {
      ColStatDescSharedPtr statDesc = (histograms)[colIndex];
      ColStatsSharedPtr colStats = statDesc->getColStats();
      double timeEncompassedInHistogram = (colStats->getMaxValue().getDblValue() - colStats->getMinValue().getDblValue());

      // Reduce the UEC by equivalent of 6 months worth of time. i.e. 86400 * 30 * 6. (86400 secs in a day)
      double timeEncompassedInHistogramAfterFunctionApplied = MAXOF((timeEncompassedInHistogram - 15552000), 0);

      minUec = maxUec = (colStats->getTotalUec() * (timeEncompassedInHistogramAfterFunctionApplied/timeEncompassedInHistogram));
      return TRUE;
    }
  }
  else
  {
    // Calculate the UEC of both the children and return the MAX of UEC
    // of children as the UEC for the biarithmetic operator.
    CostScalar firstMinUec, secondMinUec, firstMaxUec, secondMaxUec;
    if(child(0)->calculateMinMaxUecs(histograms, firstMinUec, firstMaxUec) &&
       child(1)->calculateMinMaxUecs(histograms, secondMinUec, secondMaxUec))
    {
      minUec = MAXOF(firstMinUec, secondMinUec);
      maxUec = MAXOF(firstMaxUec, secondMaxUec);
      return TRUE;
    }
  }
  return FALSE;
}

NABoolean BiArith::isOrderPreserving() const
{
  // Future Work:  By analyzing the predicate further, can determine
  // that certain biArith predicates ARE order preserving.
  //
  // EX:   T1.A = T2.X + <constant>;    -> order preserving
  //       T1.A = T2.X + T2.Y;          -> not order preserving
  // For now, we just return false.

  return FALSE;
}

// -----------------------------------------------------------------------
//  Methods for class Function
//
// -----------------------------------------------------------------------
NABoolean Function::isOrderPreserving() const
{
  // Without further info, cannot determine whether a user-defined
  // function is order preserving
  return FALSE;
}

// -----------------------------------------------------------------------
//  Methods for class UnLogic
//
// ITM_NOT, ITM_IS_TRUE, ITM_IS_FALSE, ITM_IS_NULL, ITM_IS_NOT_NULL,
// ITM_IS_UNKNOWN, ITM_IS_NOT_UNKNOWN
// -----------------------------------------------------------------------

double UnLogic::defaultSel()
{
  switch (getOperatorType())
    {
    case ITM_IS_NULL:
    case ITM_IS_UNKNOWN: // was 0.01
      return (CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_IS_NULL)) ;

    case ITM_IS_NOT_NULL:
    case ITM_IS_NOT_UNKNOWN:  // was 0.99
      return (1.0 - CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_IS_NULL)) ;

    case ITM_NOT:
      // ITM_NOT should actually be returning selectivity equal to 
      // 1 - default childSelectivity in all cases. But in some cases 
      // including aggregate expressions, we use default defaultSel() 
      // method to compute selectivity. This is 1.0 in all cases except when
      // the expression is ITM_RETURN_FALSE. Because of this the selectivity
      // of NOT returned for most aggregate function is 0. This needs to be fixed.
      // We have created a case to follow this up: Sol: 10-050721-0038

      if (child(0)->getOperatorType() == ITM_LIKE)
	return (1 - child(0)->defaultSel());
      else
        return (CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_BOOLEAN)) ;

    default: // ITM_IS_TRUE, ITM_IS_FALSE  // was 0.3333
      return (CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_BOOLEAN)) ;
    }
}

NABoolean UnLogic::synthSupportedOp() const
{
  switch (getOperatorType())
    {
    case ITM_NOT:
      return FALSE;

    case ITM_IS_TRUE:
    case ITM_IS_FALSE:
      return FALSE;

    case ITM_IS_NULL:
    case ITM_IS_NOT_NULL:
    case ITM_IS_UNKNOWN:
    case ITM_IS_NOT_UNKNOWN:
      return TRUE;

    default: // ??
      return FALSE;
    }

  return TRUE;
}

NABoolean UnLogic::applyDefaultPred(ColStatDescList & histograms,
			                         OperatorTypeEnum exprOpCode,
						 ValueId predValueId,
                                    NABoolean & globalPredicate,
                                    CostScalar *maxSelectivity)
{
  // maxSelectivity computation is done
  if (maxSelectivity) return TRUE;

  CostScalar defaultSel     = csOne;
  NABoolean statsExist = FALSE;
  NABoolean alreadyApplied  = FALSE;
  
  // leftColIndex contains the position of the left histogram whose statistics 
  // will be used for computing selectivity. 
  // In case the left child contains more than one columns, 
  // it would be the position of histogram with max UEC amongst left child
  CollIndex leftColIndex;

  // The leaf values of the left child
  ValueIdSet leftLeafValues;

  OperatorTypeEnum op = getOperatorType();

  statsExist = child(0)->checkForStats(histograms, leftColIndex, leftLeafValues);

  if (statsExist)
  {
    if (	 op == ITM_IS_NULL
		  OR op == ITM_IS_NOT_NULL
		  OR op == ITM_IS_UNKNOWN
		  OR op == ITM_IS_NOT_UNKNOWN
	    )
    {
	  globalPredicate = FALSE;   // not a 'global' predicate
	  ColStatDescSharedPtr statDesc = (histograms)[leftColIndex];

	  if ( NOT ( statDesc->isPredicateApplied( predValueId ) ) )
	  { // first time for this histogram
	    defaultSel = ( statDesc->isSimilarPredicateApplied( op ) ?  csOne : this->defaultSel() );

	    CostScalar oldRowcount = statDesc->getColStats()->getRowcount();

	    statDesc->addToAppliedPreds( predValueId );
	    statDesc->applySel( defaultSel );

	    // If user specified selectivity for this predicate, we need to make
	    // adjustment in reduction to reflect that.
	    statDesc->applySelIfSpecifiedViaHint(this, oldRowcount);
	  }
	  else
	  {
	    alreadyApplied = TRUE;
	  }
    } // null, not null, unknown, not unknown
    else
    {
	  DCMPASSERT( FALSE ); // unexpected condition!
	  alreadyApplied = TRUE;
    }
  }
  return alreadyApplied;
}

// -----------------------------------------------------------------------
//  Methods for class BiLogic
// -----------------------------------------------------------------------

NABoolean BiLogic::isOrderPreserving() const
{
  return FALSE;
}

NABoolean BiLogic::synthSupportedOp() const
{
  return TRUE;
}

NABoolean BiLogic::applyDefaultPred(ColStatDescList & histograms,
			                         OperatorTypeEnum exprOpCode,
						 ValueId predValueId,
                                    NABoolean & globalPredicate,
                                    CostScalar *maxSelectivity)
{
  // maxSelectivity computation is done
  if (maxSelectivity) return TRUE;

  CostScalar defaultSel     = csOne;
  NABoolean statsExist = FALSE;
  NABoolean alreadyApplied  = FALSE;

  // leftColIndex contains the position of the left histogram whose statistics 
  // will be used for computing selectivity. 
  // In case the left child contains more than one columns, 
  // it would be the position of histogram with max UEC amongst left child
  CollIndex leftColIndex;

  // The leaf values of the left child
  ValueIdSet leftLeafValues;

  OperatorTypeEnum op = getOperatorType();

  statsExist = child(0)->checkForStats(histograms, leftColIndex, leftLeafValues);

  if (statsExist)
  {
    if ( ( op == ITM_OR) || ( op == ITM_AND ) )
    {
	  // Don't do anything with this predicate right here, right now.
	  alreadyApplied = TRUE;
    }  // op is AND, or OR
    else
    {
	  DCMPASSERT( FALSE ); // unexpected condition!
	  alreadyApplied = TRUE;
    }
  }
  return alreadyApplied;
}

// -----------------------------------------------------------------------
//  Methods for class BiRelat
//   ITM_EQUAL, ITM_NOT_EQUAL, ITM_LESS, ITM_LESS_EQUAL,
//   ITM_GREATER, ITM_GREATER_EQ
// -----------------------------------------------------------------------
double BiRelat::defaultSel()
{

  switch (getOperatorType())
    {
      case ITM_EQUAL:
	{
//      return (CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_PRED_EQUAL)) ;
	  double selectivityForPredEqual = (1.0/(CURRSTMT_OPTDEFAULTS->defNoStatsUec()) );
	  return selectivityForPredEqual;
	}

      case ITM_NOT_EQUAL:
	{
//      return (1.0 - CostPrimitives::getBasicCostFactor(HIST_DEFAULT_SEL_FOR_PRED_EQUAL)) ;
	  double selectivityForPredEqual = (1.0/(CURRSTMT_OPTDEFAULTS->defNoStatsUec()) );
	  if (selectivityForPredEqual == 1.0)
	    return MIN_SELECTIVITY;
	  else
	    return (1.0 - selectivityForPredEqual);
	}

      default: // ITM_LESS, ITM_LESS_EQUAL, ITM_GREATER, ITM_GREATER_EQ
	return CURRSTMT_OPTDEFAULTS->defSelForRangePred() ;
    }
}

NABoolean BiRelat::synthSupportedOp() const
{
  return TRUE;
}

NABoolean BiRelat::applyDefaultPred(ColStatDescList & histograms,
			                         OperatorTypeEnum exprOpCode,
						 ValueId predValueId,
                                    NABoolean & globalPredicate,
                                    CostScalar *maxSelectivity)
{
  CostScalar defaultSel = csOne, leftUec = csMinusOne, rightUec = csMinusOne, oldRowcount = csOne;
  NABoolean statsExist = FALSE;
  NABoolean alreadyApplied  = FALSE;
  NABoolean rhsStatsExist = FALSE;

  // leftColIndex contains the position of the left histogram whose statistics 
  // will be used for computing selectivity. 
  // In case the left child contains more than one columns, 
  // it would be the position of histogram with max UEC amongst left child
  CollIndex leftColIndex;
  CollIndex rightColIndex;

  // following two sets contain the leaf values of the respective children
  ValueIdSet leftLeafValues;
  ValueIdSet rightLeafValues;

  // This boolean will be set to TRUE if it is an equality predicate with more
  // than one column involved and COMP_BOOL_40 is ON.
  // When counting the number of columns, it takes a combined set of the
  // columns from the left and the right children
  NABoolean equiJoinWithExpr = FALSE;

  OperatorTypeEnum op = getOperatorType();

  ItemExpr *lhs = child(0);
  ItemExpr *rhs = child(1);

  if(( op == ITM_EQUAL ) && (CmpCommon::getDefault(COMP_BOOL_40) == DF_ON))
  {
    lhs->findAll(ITM_VEG_REFERENCE, leftLeafValues, TRUE, TRUE);
    rhs->findAll(ITM_VEG_REFERENCE, rightLeafValues, TRUE, TRUE);

    if((rightLeafValues.entries() + leftLeafValues.entries() > 1) && (rightLeafValues != leftLeafValues))
    {
      statsExist = histograms.getColStatDescIndexForColWithMaxUec(leftColIndex, leftLeafValues);
      if(statsExist)
      {
	if((exprOpCode != REL_SCAN) || (CmpCommon::getDefault(COMP_BOOL_74) == DF_OFF) )
	  equiJoinWithExpr = TRUE;

	if (rightLeafValues.entries() > 0)
	  rhsStatsExist = histograms.getColStatDescIndexForColWithMaxUec(rightColIndex, rightLeafValues);
      }
    }
    else
    {
      // Here local predicates with complex expression on one side are handled.
      // If the complex expression is MOD, SUBSTRING or other functions that cannot
      // be handled, then global default selectivity of 0.5 is used.
      statsExist = lhs->checkForStats(histograms, leftColIndex, leftLeafValues);
      rhsStatsExist = rhs->checkForStats(histograms, rightColIndex, rightLeafValues);
    }
  }
  else
  {
    statsExist = lhs->checkForStats(histograms, leftColIndex, leftLeafValues);
    rhsStatsExist = rhs->checkForStats(histograms, rightColIndex, rightLeafValues);
  }

  if (statsExist)
  {
    ColStatDescSharedPtr leftStatDesc = (histograms)[leftColIndex];

    if (leftStatDesc->isPredicateApplied(predValueId))
    {
      globalPredicate = FALSE;
      return TRUE; //alreadyApplied
    }

    oldRowcount = leftStatDesc->getColStats()->getRowcount();

    if ( ( op == ITM_EQUAL ) || ( op == ITM_NOT_EQUAL ) )
    {
      // if rightside has any column, get the histograms for the right child
      ColStatDescSharedPtr rightStatDesc;
      if(rhsStatsExist)
	rightStatDesc = (histograms)[rightColIndex];

      if (equiJoinWithExpr)
      {
       if (maxSelectivity == NULL) {
	globalPredicate = FALSE;

	// if the left side represents a simple expression, use UEC from left side for calculating selectivity
	if(isSimpleComplexPredInvolved())
	{
	  leftUec = leftStatDesc->getColStats()->getTotalUec().getValue();
	  defaultSel = 1/leftUec.minCsOne().getValue();
	  if( op == ITM_NOT_EQUAL)
	    defaultSel = 1 - defaultSel.getValue();
	  leftStatDesc->applySel(defaultSel);
	}
	else
	  applyEquiJoinExpr(leftStatDesc, rightStatDesc, histograms);

	ValueIdSet columnWithPreds(leftLeafValues);
	columnWithPreds.insert(rightLeafValues);
	histograms.addToAppliedPredsOfAllCSDs( columnWithPreds, predValueId );
       } // maxSelectivity == NULL
      }
      else
      {
	// The following is for Gen Sol: 10-090316-0026: See genesis for 
	// details. 
	// If there are multiple columns on the left side then 
	// we need to make sure that the columns are part of a VEG.
	if (leftLeafValues.entries() > 1)
	{
	  statsExist = FALSE;
	  
	  // Make sure the columns in the VEG do not belong to the same table
	  SET(TableDesc *) * tableDescs = NULL;
	  tableDescs = leftLeafValues.getAllTables();
	  if(tableDescs->entries() > 1)
	  {
	    leftLeafValues.clear();
	    lhs->findAll(ITM_VEG_REFERENCE, leftLeafValues, TRUE, TRUE);
	    if (leftLeafValues.entries() == 1)
	    {
	      statsExist = histograms.getColStatDescIndexForColWithMaxUec(leftColIndex, leftLeafValues);
	      leftStatDesc = (histograms)[leftColIndex];
	      oldRowcount = leftStatDesc->getColStats()->getRowcount();
	    }
	  }
	}

	if(statsExist)
	{
	  globalPredicate = FALSE;
	  applyLocalPredExpr(leftStatDesc, rightStatDesc, exprOpCode, 
			      leftLeafValues, rightLeafValues, maxSelectivity);	
	  if (maxSelectivity == NULL)
	    leftStatDesc->addToAppliedPreds( predValueId );
	}
       } // 1b: equal, not equal
    } 
    // less, less_eq, greater, greater_eq
    else if (	 op == ITM_LESS
		  OR op == ITM_LESS_EQ
		  OR op == ITM_GREATER
		  OR op == ITM_GREATER_EQ
	    )
    {
     if (maxSelectivity == NULL) {
      globalPredicate = FALSE;
      applyRangePredExpr(leftStatDesc, leftLeafValues, rightLeafValues);
      leftStatDesc->addToAppliedPreds( predValueId );
     } // maxSelectivity == NULL
    } // less, less_eq, greater, greater_eq
    else
    {
	  DCMPASSERT( FALSE ); // unexpected condition!
	  alreadyApplied = TRUE;
    }

    // If user specified selectivity for this predicate, we need to make
    // adjustment in reduction to reflect that.
    leftStatDesc->applySelIfSpecifiedViaHint(this, oldRowcount);
  }
  return alreadyApplied;
}

NABoolean BiRelat::isSimpleComplexPredInvolved()
{  
  ItemExpr *lhs = child(0)->getLeafValueIfUseStats();
  ItemExpr *rhs = child(1)->getLeafValueIfUseStats();

  OperatorTypeEnum leftOp = lhs->getOperatorType();
  OperatorTypeEnum rightOp = rhs->getOperatorType();

  if( (leftOp != ITM_VEG_REFERENCE) && (rightOp != ITM_VEG_REFERENCE) ||
      (leftOp == ITM_VEG_REFERENCE) && (rightOp == ITM_VEG_REFERENCE) )
    return FALSE;

  // Some operators have specific logic for calculating UEC and should not be
  // considered for simple-complex rule. Currently, CASE, SUBSTRING are in the
  // list. More will be added as more operators require special logic.

  if((rightOp == ITM_MOD) || (leftOp == ITM_MOD))
    return FALSE;

  if((rightOp == ITM_SUBSTR) && (rhs->child(0)->getOperatorType()== ITM_CAST) ||
     (leftOp == ITM_SUBSTR) && (lhs->child(0)->getOperatorType()== ITM_CAST))
    return FALSE;

  NABoolean caseStmtInvlvd = FALSE;
  ItemExpr *tempPred;

  if(leftOp == ITM_CASE || leftOp == ITM_IF_THEN_ELSE)    
  {
    tempPred = lhs;
    if(leftOp == ITM_CASE)
      tempPred = tempPred->child(0);
    caseStmtInvlvd = TRUE;
  }
  else if(rightOp == ITM_CASE || rightOp == ITM_IF_THEN_ELSE)
  {
    tempPred = rhs;
    if(rightOp == ITM_CASE)
      tempPred = tempPred->child(0);
    caseStmtInvlvd = TRUE;
  }

  if(caseStmtInvlvd)
  {
    NABoolean flag = TRUE;
    if((tempPred->child(1)->getOperatorType()== ITM_VEG_REFERENCE)||
      (tempPred->child(1)->getOperatorType()== ITM_CONSTANT))
      flag = FALSE;
    else
      flag  = TRUE;

    while(!flag && (tempPred->child(2)->getOperatorType() == ITM_IF_THEN_ELSE))
    {
      tempPred = tempPred->child(2);
      if((tempPred->child(1)->getOperatorType()== ITM_VEG_REFERENCE)||
        (tempPred->child(1)->getOperatorType()== ITM_CONSTANT))
	flag = FALSE;
      else
      {
	flag  = TRUE;
	break;
      }
    }    

    if(!flag && 
      ((tempPred->child(2)->getOperatorType()== ITM_CONSTANT)||
      (tempPred->child(2)->getOperatorType()== ITM_VEG_REFERENCE))) 
      return FALSE;
  }

  ValueIdSet leftBaseColSet, rightBaseColSet;
  lhs->findAll( ITM_BASECOLUMN, leftBaseColSet, TRUE, TRUE );
  rhs->findAll( ITM_BASECOLUMN, rightBaseColSet, TRUE, TRUE );

  SET(TableDesc *) * leftSideTables = leftBaseColSet.getAllTables();
  SET(TableDesc *) * rightSideTables = rightBaseColSet.getAllTables();

  if( (leftSideTables->entries() > 0) && (rightSideTables->entries() > 0)
    && (!rightSideTables->contains(leftSideTables->at(0))))
    return TRUE;
  else
    return FALSE;
}

void BiRelat::applyEquiJoinExpr(ColStatDescSharedPtr & leftStatDesc,
					  ColStatDescSharedPtr & rightStatDesc,
					  ColStatDescList & histograms)
{
  CostScalar defaultSel = csOne;
  CostScalar leftUec = csMinusOne, rightUec = csMinusOne, leftMaxUec = csMinusOne, rightMaxUec = csMinusOne;
  CostScalar fudgeFactorForAggFn ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_6));

  if(CmpCommon::getDefault(COMP_BOOL_108) == DF_ON)
  {
    NABoolean foundLeftUec = child(0)->calculateMinMaxUecs(histograms, leftUec, leftMaxUec);
    NABoolean foundRightUec = child(1)->calculateMinMaxUecs(histograms, rightUec, rightMaxUec);

    if(foundLeftUec || foundRightUec)
    {
      if (child(0)->containsAnAggregate())
	leftUec = (leftUec * fudgeFactorForAggFn).minCsOne();

      if (child(1)->containsAnAggregate())
	rightUec = (rightUec * fudgeFactorForAggFn).minCsOne();

      defaultSel = csOne/(MAXOF(leftUec, rightUec)).minCsOne();
    }
    else
      defaultSel = CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL );
  }
  else
  {
    // rightLeafValues > 0 and leaftLeafValues > 0 or <col1> = <col2>
    // col1 = Fn(col2, col3) OR Fn(col1, col2) = col3
    // selectivity is equal to 1/MAXUEC of the columns 
    // participating in the query
    // get histograms from right child. In case there are
    // more than one columns in the right child, get the histogram 
    // with max UEC
    ColStatsSharedPtr leftColStat = leftStatDesc->getColStats();
    leftUec = leftColStat->getTotalUec();
    NABoolean leftColStatReal = !leftColStat->isOrigFakeHist();

    // If we reached here, it is guaranteed that we have histograms for the
    // left child. But, we cannot say anything for sure for the right child.
    // Hence check if there exists any column in the right side of the equality
    // predicates.

    ColStatsSharedPtr rightColStat;
    NABoolean rightColStatReal = FALSE;
    CostScalar rightUec = csMinusOne;

    // if rightside has any column, get the histograms for the right child
    if(rightStatDesc)
    {
      // out of all these columns, pick the one with Max UEC. While doing
      // that it also makes sure that it is comparing the default UEC to
      // default UEC and actual UEC to actual one of both children
      rightColStat = rightStatDesc->getColStats();
      rightUec = rightColStat->getTotalUec();
      rightColStatReal = !rightColStat->isOrigFakeHist();
    } // if getColIndex for rightLeafValues

    // check for aggregate function for both children 
    // and adjust UECs accordingly
    // We would have ideally like to compute selectivity for
    // aggregate functions by considering UEC for only those 
    // columns which are a children of Aggregate functions.
    // Because of time constraint and not being able to find
    // an inexpensive way to handle that, we are now taking the 
    // cardinality of the column with MAX UEC from the child
    // and in case of an aggregate, multiply with a fudge factor
    // Hence there could be cases where for predicates like 
    // "a + max(b) = c", we might pickup col 'a' as that has the
    // highest UEC and multiple cardinality of that by the fudge 
    // factor. If columns 'a' and 'b' belog to the same table, 
    // or have already been joined, it would not matter which
    // column we pickup, but in some cases it might poor estimates

    if (child(0)->containsAnAggregate())
      leftUec = (leftUec * fudgeFactorForAggFn).minCsOne();

    if (rightColStat && child(1)->containsAnAggregate())
      rightUec = (rightUec * fudgeFactorForAggFn).minCsOne();

    // To get the maxUec for selectivity, we don't want to compare
    // real UEC with the fake one. Hence, also check for fakeness
    // before comparing

    if (leftColStatReal && !rightColStatReal)
      defaultSel = csOne/leftUec;
    else if (rightColStatReal && !leftColStatReal)
      defaultSel = csOne/rightUec;
    else
      defaultSel = csOne/MAXOF(leftUec, rightUec);
    }

  // Since the histograms have not been merged, we don't know which
  // one will be finally picked up later for parent node, based on
  // the characteristics output. Hence set the aggregate information
  // of all correctly. Rowcount and UEC for all will be done automatically
  // during synchronizeStats. appliedPreds is the only that needs to be
  // correctly set

  // This will modify the rowcount, which should be done for only
  // one histogram. Remaining histograms will be synchronized later
  leftStatDesc->applySel( defaultSel );
}

void BiRelat::applyLocalPredExpr(ColStatDescSharedPtr & leftStatDesc,
                                 ColStatDescSharedPtr & rightStatDesc,
                                 OperatorTypeEnum exprOpCode,
                                 ValueIdSet leftLeafValues,
                                 ValueIdSet rightLeafValues,
                                 CostScalar *maxSelectivity)
{
  CostScalar defaultSel = csOne;
  OperatorTypeEnum op = getOperatorType();

  ColStatsSharedPtr leftColStat = leftStatDesc->getColStats();
  CostScalar leftUec = leftColStat->getTotalUec();;
  
  // If we reached here, it is guaranteed that we have histograms for the
  // left child. But, we cannot say anything for sure for the right child.
  // Hence check if there exists any column in the right side of the equality
  // predicates.

  ColStatsSharedPtr rightColStat;
  CostScalar rightUec = csMinusOne;

  // if rightside has any column, get the histograms for the right child
  if (rightStatDesc)
  {
    // out of all these columns, pick the one with Max UEC. While doing
    // that it also makes sure that it is comparing the default UEC to
    // default UEC and actual UEC to actual one of both children
    rightColStat = rightStatDesc->getColStats();
    rightUec = rightColStat->getTotalUec();
  } // if getColIndex for rightLeafValues

  // First Question: is the predicate of the form
  //   "<col> <op> <expression>"  or  "<expression> <op> <col>"?
  //
  // Earlier logic places stand-alone columns on the left, so look
  // for a histogram associated with the left-hand valueId.
  //
  // Of course, the trick here is that the following works even if
  // the left-hand ValueId isn't for a column.

  // There is only one column in the predicate or there is one 
  // column on the right hand side of the predicate and
  // this column is same as the left side column, use Uec
  // of the column instead of using default statistics

  if  (( rightLeafValues.isEmpty() )  ||  
    ( (rightLeafValues.entries() == 1) && (leftLeafValues == rightLeafValues ) ) )
  { 
    // <col> <op> <constant> or <col> <op> <col>, 
    // where the left and the right col are same
    // If this is a 'fake' histogram, the selectivity
    // assumed is the default selectivity associated
    // with the current predicate.  (But, don't apply
    // the same type of predicate multiple times.)
    if ( leftColStat->isFakeHistogram() )
    {
      defaultSel = (leftStatDesc->isSimilarPredicateApplied( op ) ?
				  csOne : this->defaultSel() );
    }
    else
    { 
      // not a 'fake histogram'
      // Determine the rowcount of the non-NULL value
      // with the greatest/least rowcount.
      HistogramSharedPtr histP    = leftColStat->getHistogram();
      const CostScalar & rowRedF = leftColStat->getRedFactor();

      const CostScalar & rowCountBeforePred = (leftColStat->getRowcount()).minCsOne();

      CostScalar maxRowCount = csOne;
      CostScalar minRowCount = rowCountBeforePred;
      CostScalar tmpRowCount;
      CostScalar uec;

      Interval iter = histP->getFirstInterval();

      while ( iter.isValid() && !iter.isNull() )
      {
	// uec must be at least 1 for these
	// calculations since we don't want to get
	// a huge blowup in rowcount
	if ( iter.getUec().isZero() )
	{
	  iter.next();
	  continue; // avoid divide-by-zero!
	}

	CostScalar iRows = rowRedF * iter.getRowcount();

	uec = (MINOF(iRows, iter.getUec())).minCsOne();

	tmpRowCount =
	  iRows / uec;

	if ( tmpRowCount > maxRowCount )
	  maxRowCount = tmpRowCount;

	if ( tmpRowCount < minRowCount )
	  minRowCount = tmpRowCount;

	iter.next();
      } // end while iter() is valid

      if ( op == ITM_EQUAL )
      {
	defaultSel = maxRowCount / rowCountBeforePred;
	if (maxSelectivity) 
	  {
	    // maxSelectivity(x=?) == max frequency / total rows
	    *maxSelectivity = 
	      MINOF(maxRowCount / rowCountBeforePred,
		    *maxSelectivity);
	  }
      }
      else
      {
	// With ITM_NOT_EQUAL, and no Histogram, avoid
	// setting defaultSel to zero:
	CostScalar numer;
	CostScalar denom;

	if ( minRowCount == rowCountBeforePred )
	{
	  numer = MAXOF( rowCountBeforePred, csTwo ) - csOne;
	  denom = MAXOF( rowCountBeforePred, csTwo );

	  defaultSel = numer / denom;
	}
	else
	{
	  numer = minRowCount;
	  denom = rowCountBeforePred;

	  defaultSel = csOne - ( numer / denom );
	  // maxSelectivity(x<>?) == 1.0
	  // which means do nothing here because 1.0 has
	  // already been set as the default maxSelectivity
	  // just before the estimateCardinality() call.
	}
      } // op == ITM_NOT_EQUAL
    }  // not a 'fake histogram'
  } // The Operand is a constant expression.
  else // i.e., NOT leafValues.isEmpty()
  {
    // <col1> <op> <col2>, or <col1 + col2> <op> <col3>
    // or <col1> <op> <col2 + col3>
    // The operand involves more than one column, which
    // makes this an equality join that we are not now
    // able to evaluate.
    // Note that the current predicate-based defaultSel
    // routine is not used in this case.....
    if(isSimpleComplexPredInvolved())
    {
      defaultSel = 1/leftUec.minCsOne().getValue();
      if( op == ITM_NOT_EQUAL)
	defaultSel = 1 - defaultSel.getValue();
    }
    else
    {
      if ( op == ITM_EQUAL )
      {
	if (leftStatDesc->isSimilarPredicateApplied( op ) )
	      defaultSel = csOne;
	else
	{
	  if ( (rightLeafValues.entries() == 1) &&
		(exprOpCode != REL_SCAN) )
	  {
	    // <col1> = <col2>
	    // we already know that left side has one column. This is the case
	    // col1 Join col2

	    if(rightStatDesc)
	    {
		CostScalar maxUec = (MAXOF(leftUec, rightUec)).minCsOne();
		defaultSel = csOne/maxUec;
	    } // colStat for column found
	    else
	    {
	      // histogram does not exist use default selectivity for Join equal
	      defaultSel = CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL );
	    }
	  } // rightLeafValueEntries = 1
	  else
	  {
	    // right side has more than one columns, or it is a scan. 
	    // Use default join equal selectivity for <col1> = <col2, col3>
	    // and hist_no_stats_uec for scan
	    if (exprOpCode == REL_SCAN)
	      defaultSel = (1.0/CURRSTMT_OPTDEFAULTS->defNoStatsUec());
	    else
	      defaultSel = CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL );
	  }
	}
      }
      else
      { // op == ITM_NOT_EQUAL. Apply default selectivity
	if (leftStatDesc->isSimilarPredicateApplied( op ) )
	  defaultSel = csOne;
	else
	{
	  if (exprOpCode == REL_SCAN)
	    defaultSel = 1 - (1.0/CURRSTMT_OPTDEFAULTS->defNoStatsUec() );
	  else
	      defaultSel = (1 - CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_EQUAL ) );
	}
      }
    }
  } // NOT leafValue.isEmpty()
    leftStatDesc->applySel( defaultSel );
}

void BiRelat::applyRangePredExpr(ColStatDescSharedPtr & leftStatDesc,
					      ValueIdSet leftLeafValues,
					      ValueIdSet rightLeafValues)
{
  ItemExpr * lhs = child(0);
  lhs = lhs->getLeafValueIfUseStats();

  ItemExpr * rhs = child(1);
  rhs = rhs->getLeafValueIfUseStats();

  CostScalar defaultSel = csOne;
  OperatorTypeEnum op = getOperatorType();

  ColStatsSharedPtr leftColStat = leftStatDesc->getColStats();
  CostScalar leftUec = leftColStat->getTotalUec();

  // First Question: is the predicate of the form
  //   "<col> <op> <expression>"  or  "<expression> <op> <col>"?

    defaultSel = leftStatDesc->selForRelativeRange (op, lhs->getValueId(), rhs);
	    
	    // defaultSel is one for range predicates on char or varchar 
	    // column types.
	    // if a similar predicate has already been applied to this 
	    // histogram, then we don't want to reduce the rowcount and
	    // uec further. Therefore we return the selectivity equal to
	    // one. Following two preedicates are said to be similar:
	    // 1. < and <=
	    // 2. > and >=
	    // 3. If the two range predicates are derived from LIKE predicate

	    // There is only one column in the predicate or there is one 
	    // column on the right hand side of the predicate and
	    // this column is same as the left side column, use Uec
	    // of the column instead of using default statistics

	    if  (( rightLeafValues.isEmpty() )  ||  
	      ( (rightLeafValues.entries() == 1) && (leftLeafValues == rightLeafValues ) ) )
	    {
		  // pred <col> operatort <expression>
		  if ( defaultSel == csOne )
		  {
	    if (leftStatDesc->derivOfLikeAndSimilarPredApp(this) ||
		    leftStatDesc->isSimilarPredicateApplied( op ) )
			  defaultSel = csOne;
		    else
		    {
			  BiRelat *br = (BiRelat *) this;
			  if (br->derivativeOfLike())
			    defaultSel = br->getLikeSelectivity();
			  else
			    defaultSel = this->defaultSel();
	    } // else leftStatDesc->similarPredApplied
		  } // if defaultSel == csOne
	    } // if rightLeafValues.entries > 1 or leftLeafValues != rightLeafValues
	    else
	    {
		  // not a leaf value. Predicate is <col> operator <col>
		  defaultSel =
	    ( leftStatDesc->isSimilarPredicateApplied( op ) ?
		    csOne : CostPrimitives::getBasicCostFactor( HIST_DEFAULT_SEL_FOR_JOIN_RANGE ) );
	    } // end of join range

    leftStatDesc->applySel( defaultSel );
}


NABoolean Case::calculateMinMaxUecs(ColStatDescList & histograms,
				    CostScalar & minUec,
				    CostScalar & maxUec)
{
  // Some of the date/time functions get transformed into CASE
  // statements. In such cases, we can assign UECs without 
  // evaluating them.

  switch(origOpType())
  {
    case ITM_DAYNAME:
      minUec = maxUec = 7;
      return TRUE;
    case ITM_MONTHNAME:
      minUec = maxUec = 12;
      return TRUE;
    case ITM_QUARTER:
      minUec = maxUec = 4;
      return TRUE;
    case ITM_CASE:
      return child(0)->calculateMinMaxUecs(histograms, minUec, maxUec);
    default:
      minUec = maxUec = csMinusOne;
      return FALSE;
  }    
}

NABoolean IfThenElse::calculateMinMaxUecs(ColStatDescList & histograms,
					  CostScalar & minUec,
					  CostScalar & maxUec)
{
  CostScalar firstMinUec, secondMinUec, firstMaxUec, secondMaxUec;
  NABoolean firstOutcomeResult = child(1)->calculateMinMaxUecs(histograms, firstMinUec, firstMaxUec);
  NABoolean secondOutcomeResult = child(2)->calculateMinMaxUecs(histograms, secondMinUec, secondMaxUec);

  if(firstOutcomeResult && secondOutcomeResult)
  {
    if((child(1)->getOperatorType() == ITM_CONSTANT) || 
       (child(2)->getOperatorType() == ITM_CONSTANT))
      minUec = firstMinUec + secondMinUec;
    else
      minUec = MINOF(firstMinUec, secondMinUec);

    maxUec = firstMinUec + secondMinUec;
    return TRUE;
  }
  else
    return FALSE;
}

NABoolean Substring::calculateMinMaxUecs(ColStatDescList & histograms,
					 CostScalar & minUec,
					 CostScalar & maxUec)
{
  // Return if the function is not a SUBSTRING originally
  if(origOpType()!= ITM_SUBSTR)
    return FALSE;

  CostScalar rowCount = csMinusOne; 
  CostScalar uec = csMinusOne; 
  CollIndex columnIndex;
  ValueIdSet leafValues;

  this->findAll(ITM_VEG_REFERENCE, leafValues, TRUE, TRUE);

  if(leafValues.entries() != 1)
    return FALSE;

  ValueId vid;
  leafValues.getFirst(vid);

  NABoolean statsExist = histograms.getColStatDescIndexForColumn(columnIndex, vid);

  if(!statsExist)
    return FALSE;

  NABoolean negate;
  double origLength = 0, length = 0;
  const NAType *operand1 = &child(0)->getValueId().getType();
  const CharType *charOperand = (CharType *) operand1;
  origLength = charOperand->getStrCharLimit();

  if(getArity() == 3)
  {
    ConstValue *cv = child(2)->castToConstValue(negate);
    if(cv)
      length = convertInt64ToDouble(cv->getExactNumericValue());
    else
      return FALSE;
  }
  else
  {
    ConstValue *cv = child(1)->castToConstValue(negate);
    if(cv)
      length = convertInt64ToDouble(cv->getExactNumericValue());
    else
      return FALSE;
  }

  ColStatDescSharedPtr statDesc = (histograms)[columnIndex];
  rowCount = statDesc->getColStats()->getRowcount();
  uec = statDesc->getColStats()->getTotalUec();
  uec *= (length/origLength);
  minUec = MINOF(uec, pow(double(10), length));
  maxUec = MINOF(rowCount, pow(double(10), length));
  return TRUE;
}

// This will cover all of the math functions:
// Arithmetic functions - ABS, CEILING, FLOOR
// Geometric functions - DEGREES, RADIANS
// Trigonometric functions - ACOS, ASIN, ATAN, ATAN2, COS, COSH, SIN, SINH, TAN, TANH
// Other math functions - EXP, LOG, LOG10, POWER, SQRT
NABoolean MathFunc::calculateMinMaxUecs(ColStatDescList & histograms,
					 CostScalar & minUec,
					 CostScalar & maxUec)
{
  if(getArity() == 1)
    return child(0)->calculateMinMaxUecs(histograms, minUec, maxUec);
  else
    return FALSE;
}

NABoolean Modulus::calculateMinMaxUecs(ColStatDescList & histograms,
					 CostScalar & minUec,
					 CostScalar & maxUec)
{
  // In modulus function, the divisor stores the number of possible outcomes.
  // The divisor will be used as the UEC for modulus.
  NABoolean negate;
  double divisor = 0;
  ConstValue *cv = child(1)->castToConstValue(negate);
  if(cv)
  {
    divisor = convertInt64ToDouble(cv->getExactNumericValue());
    minUec = maxUec = divisor;
    return TRUE;
  }
  else
    return FALSE;
}

NABoolean DayOfWeek::calculateMinMaxUecs(ColStatDescList & histograms,
					 CostScalar & minUec,
					 CostScalar & maxUec)
{
  minUec = maxUec = 7;
  return TRUE;
}

NABoolean Extract::calculateMinMaxUecs(ColStatDescList & histograms,
					  CostScalar & minUec,
					  CostScalar & maxUec)
{
  switch(getExtractField())
  {
  case REC_DATE_YEAR:
  case REC_DATE_YEARQUARTER_EXTRACT:
  case REC_DATE_YEARMONTH_EXTRACT:
  case REC_DATE_YEARWEEK_EXTRACT:
    {
      // For YEAR function, the histogram data will be used to calculate
      // the number of years encompassed between boundaries of histogram data.
      CollIndex columnIndex;
      ValueIdSet leafValues;

      this->findAll(ITM_VEG_REFERENCE, leafValues, TRUE, TRUE);

      if(leafValues.entries() != 1)
	return FALSE;

      ValueId vid;
      leafValues.getFirst(vid);

      NABoolean statsExist = histograms.getColStatDescIndexForColumn(columnIndex, vid);

      if(!statsExist)
	return FALSE;

      ColStatDescSharedPtr statDesc = (histograms)[columnIndex];
      ColStatsSharedPtr colStats = statDesc->getColStats();

      if(colStats->isOrigFakeHist())
	return FALSE;

      NAString typeName = colStats->getStatColumns()[0]->getType()->getTypeName();
      if(typeName != "DATE" && typeName != "TIMESTAMP")
	return FALSE;

      EncodedValue minValue = colStats->getMinValue();
      EncodedValue maxValue = colStats->getMaxValue();

      // minUEC is number of days in the interval for now (1 day = 86400 sec)
      minUec = (maxValue.getDblValue() - minValue.getDblValue()) / 86400;

      switch(getExtractField())
        {
        case REC_DATE_YEAR:
          minUec /= 365;
          break;
        case REC_DATE_YEARQUARTER_EXTRACT:
          minUec /= 91;
          break;
        case REC_DATE_YEARMONTH_EXTRACT:
          minUec /= 30;
          break;
        case REC_DATE_YEARWEEK_EXTRACT:
          minUec /= 7;
          break;
        }

      minUec = MIN_ONE_CS(ceil(minUec.getValue()));
      maxUec = minUec + 1; // interval may wrap around a year, quarter, etc.
    }
    break;
  case REC_DATE_MONTH:
    minUec = maxUec = 12;
    break;
  case REC_DATE_DAY:
    minUec = maxUec = 31;
    break;
  case REC_DATE_HOUR:
    minUec = maxUec = 24;
    break;
  case REC_DATE_MINUTE:
    minUec = maxUec = 60;
    break;
  case REC_DATE_SECOND:
    minUec = maxUec = 60;
    break;
  default:
    minUec = maxUec = csMinusOne;
    return FALSE;
  }    
  return TRUE;
}

NABoolean BuiltinFunction::calculateMinMaxUecs(ColStatDescList & histograms,
					CostScalar & minUec,
					CostScalar & maxUec)
{
  switch(getOperatorType())
  {
  case ITM_ASCII:
    {
      minUec = maxUec = 256;

      // The UEC upper bound for ASCII is 256. If the UEC of the column
      // is less than 256, then the UEC will be equal to the UEC of the column.
      CostScalar colMinUec = csOne, colMaxUec = csOne;
      if((getArity() == 1) &&
	 (child(0)->calculateMinMaxUecs(histograms, colMinUec, colMaxUec)) &&
	 (colMinUec < 256))
	minUec = maxUec = colMinUec;
    }
    break;
  case ITM_CONCAT:
  case ITM_REPLACE:
    return child(0)->calculateMinMaxUecs(histograms, minUec, maxUec);
  case ITM_CHAR_LENGTH:
  case ITM_OCTET_LENGTH:
    {
      // For CHAR_LENGTH, OCTET_LENGTH, the first operand stores the maximum
      // length of the string. The UEC of these functions is the same.
      const NAType *operand1 = &child(0)->getValueId().getType();
      const CharType *charOperand = (CharType *) operand1;
      minUec = maxUec = charOperand->getStrCharLimit();
    }
    break;
  case ITM_POSITION:
    {
      // POSITION is similar to CHAR_LENGTH, OCTET_LENGTH except that the 
      // maximum string length is stored in the second operand.
      const NAType *operand2 = &child(1)->getValueId().getType();
      const CharType *mainStrCharType = (CharType *) operand2;
      minUec = maxUec = mainStrCharType->getStrCharLimit();
    }
    break;
  case ITM_CAST:
  case ITM_LOWER:
  case ITM_UPPER:
  case ITM_CONVERT:
    {
      if(getArity() == 1)
	return child(0)->calculateMinMaxUecs(histograms, minUec, maxUec);
      else
	return FALSE;
    }
    break;
  case ITM_TRIM:
    {
      if(getArity() == 2)
	return child(1)->calculateMinMaxUecs(histograms, minUec, maxUec);
      else
	return FALSE;
    }
    break;
  case ITM_CONVERTTIMESTAMP:
  case ITM_UNIX_TIMESTAMP:
  case ITM_SLEEP:
  case ITM_CURRENT_TIMESTAMP:
  case ITM_CURRENT_TIMESTAMP_RUNNING:
  case ITM_JULIANTIMESTAMP:
  case ITM_INTERNALTIMESTAMP:
    {
      minUec = maxUec = csOne;
    }
    break;
  default:
    minUec = maxUec = csMinusOne;
    return FALSE;
  }    
  return TRUE;
}

NABoolean ItmSequenceFunction::calculateMinMaxUecs(ColStatDescList & histograms,
					CostScalar & minUec,
					CostScalar & maxUec)
{
  minUec = maxUec = csOne;
  return TRUE;
}

NABoolean ConstValue::calculateMinMaxUecs(ColStatDescList & histograms,
					  CostScalar & minUec,
					  CostScalar & maxUec)
{
  minUec = maxUec = csOne;
  return TRUE;
}

// -----------------------------------------------------------------------
//  Methods for class VEGPredicate
// -----------------------------------------------------------------------

NABoolean VEGPredicate::synthSupportedOp() const
{
  return TRUE;
}

NABoolean VEGPredicate::applyDefaultPred(ColStatDescList & histograms,
			                         OperatorTypeEnum exprOpCode,
						 ValueId predValueId,
                                         NABoolean & globalPredicate,
                                         CostScalar *maxSelectivity)
{
  CostScalar defaultSel     = csOne;
  NABoolean statsExist = FALSE;
  NABoolean alreadyApplied  = FALSE;

  // leftColIndex contains the position of the left histogram whose statistics 
  // will be used for computing selectivity. 
  // In case the left child contains more than one columns, 
  // it would be the position of histogram with max UEC amongst left child
  CollIndex leftColIndex;

  // The leaf values of the left child
  ValueIdSet leftLeafValues;

  // could be a VEG predicate with no children
  CollIndexList statsToMerge(STMTHEAP);

  OperatorTypeEnum op = getOperatorType();

  // locate entries in this ColStatDescList that are associated
  // with the current VEG predicate.
  statsExist =
	histograms.identifyMergeCandidates( this, leftColIndex, statsToMerge );

  if (statsExist)
  {
    // Get the first column for which the histogram is found. 
    // multi-column histograms no longer are running around the system, but
    // there are still predicates that reach here
    //
    //   e.g., predicates involving aggregates (count(*) = 10)
    //   e.g., predicates on host variables (?p = 10)
    //
    // ... probably many more

    if ( op == ITM_VEG_PREDICATE ) // Transitive closure predicate
    {
	  // ----------------------------------------------------------
	  // This logic replicates a portion to the logic used in
	  // ColStatDescList::applyVEGPred to determine the parts of this
	  // VEGPred for which 'real' histogram manipulation was NOT done.
	  //
	  // Default selectivity should be applied *once* for *each* valid
	  // join that couldn't be executed because of its multi-column
	  // nature, but which would have otherwise been a valid join.
	  // (So long as an involved column hasn't already appeared in a
	  // default EQ-join.)
	  // ----------------------------------------------------------
	  // $$$ FCS_ONLY kludge -- this code should be reviewed and
	  // $$$ reconsidered post-FCS
	  //
	  // We reach this point when we're applying a predicate that looks
	  // like <col> = hostvar.
	  //
	  // We may reach this point in other instances, too.  For now, we're
	  // not going to worry about that.
	  //
	  // For this situation, we set the rowcount to be rc/uec, and set
	  // uec to be 1.  This reduction is exactly what you'd expect
	  // when applying an eqpred with a hostvar.
	  //
	  ColStatDescSharedPtr rootStatDesc = (histograms)[leftColIndex];
	  ColStatsSharedPtr rootColStats = rootStatDesc->getColStatsToModify();

          // compute maxSelectivity before histograms get modified
          if (maxSelectivity)
            {
              VEG* veg = ((VEGPredicate*)this)->getVEG();
              const ValueIdSet & values = veg->getAllValues();
              // we must distinguish between "X=?" and "X=Y".
              ItemExpr *cExpr = NULL;
              if (!values.referencesAConstExpr(&cExpr))
                { // veg is an "X=Y" predicate
                  // maxSelectivity("X=Y") == 1.0
                }
              else // veg is an "X=?" predicate
                {
                  // maxFreq = maxFrequency(X) for VEGPred "X=?"
                  // NB: "maxFreq = histograms.getMaxFreq(v);" may look
                  // attractive here. But, beware: it causes many maxCard
                  // tests to fail in compGeneral/test015 -- they get 0.
                  CostScalar maxFreq = csMinusOne;
                  for (ValueId v = values.init();    
                       values.next(v); 
                       values.advance(v))
                    {
                      if (v.getItemExpr()->getOperatorType() == 
                          ITM_BASECOLUMN)
                        {
                          maxFreq = histograms.getMaxFreq(v);
                          if (maxFreq > csMinusOne)
                            break;
                        }
                    }
                  // maxSelectivity("X=?") == max frequency(X) / total rows
                  if (maxFreq > csMinusOne && 
                      rootColStats->getRowcount() > csZero)
                    *maxSelectivity = 
                      MINOF(maxFreq / rootColStats->getRowcount(),
                            *maxSelectivity);
                } // veg is an "X=?" predicate
            } // if (maxSelectivity)
          else // maxSelectivity == NULL
          {
	  CostScalar oldUec  = rootColStats->getTotalUec();
	  CostScalar oldRows = rootColStats->getRowcount();

	  CostScalar newRows = oldRows / oldUec;

	  rootStatDesc->synchronizeStats(
	    oldRows,
	    newRows,
	    ColStatDesc::SET_UEC_TO_ONE
	    );

	  // If user specified selectivity for this predicate, we need to make
	  // adjustment in reduction to reflect that.
	  rootStatDesc->applySelIfSpecifiedViaHint(this, oldRows);

          } // else maxSelectivity == NULL
	  alreadyApplied = TRUE;
    } 
    else
    {
	  DCMPASSERT( FALSE ); // unexpected condition!
	  alreadyApplied = TRUE;
    }
  }
  return alreadyApplied;
}
