core/sql/optimizer/TableDesc.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
 **************************************************************************
 *
 * File:         TableDesc.C
 * Description:  A table descriptor
 * Created:      4/27/94
 * Language:     C++
 * Status:       Experimental
 *
 *
 **************************************************************************
 */

 #include "AllItemExpr.h"
 #include "AllRelExpr.h"
 #include "BindWA.h"
 #include "ComOperators.h"
 #include "ItemColRef.h"
 #include "ParNameLocList.h"
 #include "Sqlcomp.h"
 #include "ex_error.h"
 #include "Cost.h"      /* for lookups in the defaults table */
 #include "Analyzer.h"
 #include "HDFSHook.h"

 // -----------------------------------------------------------------------
 // Constructor (but note that much more useful stuff goes on in
 // static createTableDesc in BindRelExpr.C)
 // -----------------------------------------------------------------------
 TableDesc::TableDesc(BindWA *bindWA,
                      const NATable *table,
                      CorrName& corrName)
          	: table_(table),
                   indexes_(bindWA->wHeap()),
                   uniqueIndexes_(bindWA->wHeap()),
                   vertParts_(bindWA->wHeap()),
                   hintIndexes_(bindWA->wHeap()),
                   colStats_(bindWA->wHeap()),
                   corrName_("",bindWA->wHeap()),
                   analysis_(NULL)
 {
   corrName.applyDefaults(bindWA, bindWA->getDefaultSchema());
   corrName_ = corrName ;
   selectivityHint_ = NULL;
   cardinalityHint_ = NULL;
   histogramsCompressed_ = FALSE;
   minRC_ = csOne;
   maxRC_ = COSTSCALAR_MAX;

   // Fix up the name location list to help with the computing
   // of view text, check constraint search condition text, etc.
   //
   if (corrName.getSpecialType() == ExtendedQualName::TRIGTEMP_TABLE)  // -- Triggers
 	  return;

   ParNameLocList *pNameLocList = bindWA->getNameLocListPtr();
   if (pNameLocList)
   {
     ParNameLoc * pNameLoc
       = pNameLocList->getNameLocPtr(corrName_.getNamePosition());
     if (pNameLoc AND pNameLoc->getExpandedName(FALSE).isNull())
     {
       pNameLoc->setExpandedName(corrName_.getQualifiedNameObj().
                                 getQualifiedNameAsAnsiString());
     }
   }
 }


 // -----------------------------------------------------------------------
 // Add a CheckConstraint to the TableDesc.
 //
 // A table check constraint "CHECK (pred)" evaluates as
 // "WHERE (pred) IS NOT FALSE" (i.e. TRUE or NULL);
 // see ANSI 4.10 and in particular 11.21 GR 5+6.
 //
 // A view check constraint is the WHERE clause of a WITH CHECK OPTION view;
 // it must evaluate to TRUE:
 // "WHERE pred" (FALSE or NULL *fails*).
 //
 // Note that if the pred is "IS NOT NULL" (certainly a common CHECK pred),
 // we can optimize the run-time a tiny bit by not having to use a BoolVal
 // (because "IS NOT NULL" only returns TRUE or FALSE, never UNKNOWN/NULL).
 //
 // This method should be kept in synch with NAColumn::getNotNullViolationCode.
 // -----------------------------------------------------------------------
 void TableDesc::addCheckConstraint(BindWA *bindWA,
 				   const NATable *naTable,
 				   const CheckConstraint *constraint,
 				   ItemExpr *constrPred)
 {
   BoolVal *ok = new (bindWA->wHeap()) BoolVal(ITM_RETURN_TRUE);

   RaiseError *error = new (bindWA->wHeap())
     RaiseError(0,
                constraint->getConstraintName().getQualifiedNameAsAnsiString(),
                naTable->getTableName().getQualifiedNameAsAnsiString(),
                bindWA->wHeap());

   if (constraint->isViewWithCheckOption())
     {
       if (constraint->isTheCascadingView())
 	error->setSQLCODE(EXE_CHECK_OPTION_VIOLATION);		// cascadING
       else
 	error->setSQLCODE(EXE_CHECK_OPTION_VIOLATION_CASCADED);	// cascadED
       constrPred = new (bindWA->wHeap()) IfThenElse(constrPred, ok, error);
     }
   else if (constrPred->isISNOTNULL())
     {
       error->setSQLCODE(EXE_TABLE_CHECK_CONSTRAINT);
       constrPred = new (bindWA->wHeap()) IfThenElse(constrPred, ok, error);
     }
   else
     {
       constrPred = new (bindWA->wHeap()) UnLogic(ITM_IS_FALSE, constrPred);
       error->setSQLCODE(EXE_TABLE_CHECK_CONSTRAINT);
       constrPred = new (bindWA->wHeap()) IfThenElse(constrPred, error, ok);
     }

   // IfThenElse only works if Case is its parent.
   constrPred = new (bindWA->wHeap()) Case (NULL, constrPred);

   constrPred->bindNode(bindWA);
   CMPASSERT(!bindWA->errStatus());

   checkConstraints_.insert(constrPred->getValueId());

 } // TableDesc::addCheckConstraint

 // TableDesc::isKeyIndex()
 // Parameter is an secondary index on the table. Table checks to see
 // if the keys of the secondary index is built using the primary key
 // of the table. If it is return true otherwise false.
 NABoolean TableDesc::isKeyIndex(const IndexDesc * idesc) const
 {
   ValueIdSet pKeyColumns = clusteringIndex_->getIndexKey();
   ValueIdSet indexColumns = idesc->getIndexKey();
   ValueIdSet basePKeys=pKeyColumns.convertToBaseIds();


   for(ValueId id = indexColumns.init(); indexColumns.next(id);
 			indexColumns.advance(id))
   {
 	ValueId baseId = ((BaseColumn *)(((IndexColumn *)id.getItemExpr())->
 			  getDefinition().getItemExpr()))->getValueId();
 	if(NOT basePKeys.contains(baseId))
 	{
 	   return FALSE;
 	}
   }

   return TRUE;


 }

 // this method sets the primary key columns. It goes through all the columns
 // of the table, and collects the columns which are marked as primary keys
 void TableDesc::setPrimaryKeyColumns()
 {
   ValueIdSet primaryColumns;

   for ( CollIndex j = 0 ; j < colList_.entries() ; j++ )
     {

       ValueId valId = colList_[j];

       NAColumn *column = valId.getNAColumn();

       if ( column->isPrimaryKey() )
       {
 	primaryColumns.insert(valId) ;
         // mark column as referenced for histogram, as we may need its histogram
         // during plan generation
         if ((column->isUserColumn() || column->isSaltColumn() ) &&
             (column->getNATable()->getSpecialType() == ExtendedQualName::NORMAL_TABLE) )
               column->setReferencedForMultiIntHist();
       }
     }

   primaryKeyColumns_ = primaryColumns;
 }

 // -----------------------------------------------------------------------------
 // NABoolean TableDesc::isSpecialObj() returns TRUE if the table is an internal
 // table such as HISTOGRM, HISTINTS, DESCRIBE, or an SMD, UMD, or an MVUMD table.
 // One of its usage is during
 // getTableColStatas, where we do not want the compiler to print no stats
 // warning.
 // -----------------------------------------------------------------------------
 NABoolean TableDesc::isSpecialObj()
 {
   const NATable * naTable = getNATable();
   if (naTable->isUMDTable()   ||
       naTable->isSMDTable()   ||
       naTable->isMVUMDTable() ||
       naTable->isTrigTempTable() )
     return TRUE;

   const NAString& fileName = getCorrNameObj().getQualifiedNameObj().getObjectName();
   if ( ( fileName == "DESCRIBE__") ||	  // for non_dml statements such as showddl etc.
   (fileName == "HISTOGRM")   ||		  // following are used during update stats
   (fileName == "HISTINTS")   )
     return TRUE;
   else
     return FALSE;
 }


 // -----------------------------------------------------------------------
 // TableDesc::getUserColumnList()
 // -----------------------------------------------------------------------
 void TableDesc::getUserColumnList(ValueIdList &columnList) const
 {
   for (CollIndex i = 0; i < colList_.entries(); i++) {
     ValueId valId = colList_[i];
     NAColumn *column = valId.getNAColumn();
     if (column->isUserColumn())
       columnList.insert(valId);
   }
 }

 // -----------------------------------------------------------------------
 // TableDesc::getSystemColumnList()
 // -----------------------------------------------------------------------
 void TableDesc::getSystemColumnList(ValueIdList &columnList) const
 {
   for (CollIndex i = 0; i < colList_.entries(); i++) {
     ValueId valId = colList_[i];
     NAColumn *column = valId.getNAColumn();
     if (column->isSystemColumn())
       columnList.insert(valId);
   }
 }


 // -----------------------------------------------------------------------
 // TableDesc::getIdentityColumn()
 // -----------------------------------------------------------------------
 void TableDesc::getIdentityColumn(ValueIdList &columnList) const
 {
   for (CollIndex i = 0; i < colList_.entries(); i++)
     {
       ValueId valId = colList_[i];
       NAColumn *column = valId.getNAColumn();
       if (column->isIdentityColumn())
 	{
 	columnList.insert(valId);
 	break; // Break when you find the first,
 	// as there can only be one Identity column per table.
 	}
     }
 }


 NABoolean TableDesc::isIdentityColumnGeneratedAlways(NAString * value) const
 {
   // Determine if an IDENTITY column exists and
   // has the default class of GENERATED ALWAYS AS IDENTITY.
   // Do not return TRUE, if the table type is an INDEX_TABLE.

   NABoolean result = FALSE;

   for (CollIndex j = 0; j < colList_.entries(); j++)
     {
       ValueId valId = colList_[j];
       NAColumn *column = valId.getNAColumn();

       if(column->isIdentityColumnAlways())
         {
 	  if (getNATable()->getObjectType() != COM_INDEX_OBJECT)
 	  {
 	    if (value != NULL)
 	      *value = column->getColName();
             result = TRUE;
 	  }
         }
     }

     return result;
 }

 NABoolean TableDesc::hasIdentityColumnInClusteringKey() const
 {
   ValueIdSet pKeyColumns = clusteringIndex_->getIndexKey();
   NAColumn * column = NULL;
   for(ValueId id = pKeyColumns.init(); pKeyColumns.next(id);
       pKeyColumns.advance(id))
   {
       column = id.getNAColumn();
       if (column && column->isIdentityColumn())
           return TRUE;
   }
   return FALSE;
 }

 // -----------------------------------------------------------------------
 // Given a column list providing identifiers for columns of this table,
 // this method returns a list of VEG expressions and/or base columns that
 // show the equivalence of base columns with index columns.
 // -----------------------------------------------------------------------
 void TableDesc::getEquivVEGCols (const ValueIdList& columnList,
 				 ValueIdList &VEGColumnList) const
 {
   for (CollIndex i=0; i < columnList.entries(); i++)
     VEGColumnList.insert(getEquivVEGCol(columnList[i]));
 }

 void TableDesc::getEquivVEGCols (const ValueIdSet& columnSet,
 				 ValueIdSet &VEGColumnSet) const
 {
   for (ValueId v=columnSet.init();
        columnSet.next(v);
        columnSet.advance(v))
     VEGColumnSet += getEquivVEGCol(v);
 }

 ValueId TableDesc::getEquivVEGCol (const ValueId& column) const
 {
   BaseColumn *bc = column.castToBaseColumn();

   CMPASSERT(bc->getTableDesc() == this);
   return getColumnVEGList()[bc->getColNumber()];
 }

 // -----------------------------------------------------------------------
 // Statistics stuff
 // -----------------------------------------------------------------------
 const ColStatDescList &TableDesc::getTableColStats()
 {
     // HIST_NO_STATS_UEC can never be greater than HIST_NO_STATS_ROWCOUNT.
     // If the customer has done an illegal setting, ignore that, and set
     // it to maximum permissible value

     if (CURRSTMT_OPTDEFAULTS->defNoStatsUec() > CURRSTMT_OPTDEFAULTS->defNoStatsRowCount())
     {
       CURRSTMT_OPTDEFAULTS->setNoStatsUec(CURRSTMT_OPTDEFAULTS->defNoStatsRowCount());
     }

     if (colStats_.entries() > 0)
     {
       if (!areHistsCompressed() && (CmpCommon::getDefault(COMP_BOOL_18) != DF_ON) )
       {
 	// compress histograms based on query predicates
 	compressHistogramsForCurrentQuery();
       }
       return colStats_;
     }

     // For each ColStat, create a ColStat descriptor.
     StatsList &stats = ((NATable *)table_)->getStatistics() ;

     // if for some reason, no histograms were returned by update statistics
     // generate fake histograms for the table

     if ( stats.entries() == 0 )
        stats = ((NATable *)table_)->generateFakeStats();

     const NAColumnArray &columnList = ((NATable *)table_)->getNAColumnArray();
     const ValueIdList & tableColList = getColumnList();
     colStats_.insertByPosition(stats, columnList, tableColList);

     // done creating a ColStatDesc for each ColStats;
     // ==> now store the multi-column uec information
     MultiColumnUecList * groupUecs = new (CmpCommon::statementHeap())
       MultiColumnUecList (stats, getColumnList()) ;
     CostScalar rowcount = stats[0]->getRowcount();
     groupUecs->initializeMCUecForUniqueIndxes(*this, rowcount);

     colStats_.setUecList (groupUecs) ;

     if (CmpCommon::getDefault(USTAT_COLLECT_MC_SKEW_VALUES) == DF_ON)
     {
        MultiColumnSkewedValueLists* mcSkewedValueLists= new (CmpCommon::statementHeap())
          MultiColumnSkewedValueLists(stats, getColumnList()) ;
        colStats_.setMCSkewedValueLists (mcSkewedValueLists) ;
     }

     // -------------------------------------------------------------
     // set the UpStatsNeeded flag
     CostScalar needStatsRowcount =
       CostPrimitives::getBasicCostFactor(HIST_ROWCOUNT_REQUIRING_STATS) ;
     if (CURRSTMT_OPTDEFAULTS->ustatAutomation()) needStatsRowcount = 1;

     CMPASSERT ( colStats_.entries() > 0 ) ;  // must have at least one histogram!
     CostScalar tableRowcount = colStats_[0]->getColStats()->getRowcount() ;

     if ( ( tableRowcount >= needStatsRowcount ) &&
 	 !(isSpecialObj()) )  // UpStatsNeeded flag is used for 6007 warning,
 			     // we do not want to give this warning for
 			     // smdTables, umdTables, MVUmd tables and other special tables
       {
         for ( CollIndex i = 0 ; i < colStats_.entries() ; i++ )
           colStats_[i]->getColStatsToModify()->setUpStatsNeeded (TRUE) ;
       }
     // -------------------------------------------------------------

     // ENFORCE ROWCOUNT!  When we read them in, all stats from the same
     // table should report the same rowcount.  Currently there's a
     // problem with SYSKEY histograms having a default rowcount value --
     // which brings up the following question: are SYSKEY histograms
     // ever used in histogram synthesis?
     NABoolean printNoStatsWarning;
     if (isSpecialObj() )
   printNoStatsWarning = FALSE;
     else
   printNoStatsWarning = TRUE;
     colStats_.enforceInternalConsistency(0,colStats_.entries(), printNoStatsWarning) ;

   /*  Estimate index levels based on row count, row size and key size of the index.
   Estimation will be based upon the following assumptions:

   a. There is no slack in data blocks. Slack is  the percentage in the  data block
   that is  kept empty for future inserts into  the block. For  sequential inserts
   DP2  does not leave  any slack in data blocks  but inserts in between  rows
   causes DP2 to  move following rows in the block into a new block.

   b. DP2 encodes and optimizes key storage. Basically it will keeps the portion of
   the key that is  necessary to distinguish the  blocks in next level  of index or
   data blocks. We will assume that the whole key is being used.

   c.  Data is  uniformly distributed  among all  the partitions.  Obviously it  is
   possible that one the partitions has more data thus more data blocks leading  to
   more index level than the other ones.
   */

   // compress histograms based on query predicates
   if (CmpCommon::getDefault(COMP_BOOL_18) != DF_ON)
     compressHistogramsForCurrentQuery();

   return colStats_;
 }


 ValueIdSet TableDesc::getLocalPreds()
 {
   ValueIdSet localPreds;
   localPreds.clear();

   // We can get this information from TableAnalysis
   const TableAnalysis * tableAnalysis = getTableAnalysis();

   // if no tableAnalysis exists, return FALSE
   if(tableAnalysis)
     localPreds = tableAnalysis->getLocalPreds();

   return localPreds;
 }

 // Is there any column which has a local predicates and no stats
 NABoolean TableDesc::isAnyHistWithPredsFakeOrSmallSample(const ValueIdSet &localPreds)
 {
   // if there are no local predicates return FALSE;
   if (localPreds.isEmpty())
     return FALSE;

   const ColStatDescList & colStatsList = getTableColStats();
   // for each predicate, check to see if stats exist
   for (ValueId id = localPreds.init();
        localPreds.next(id);
        localPreds.advance(id))
        {
 		ColStatsSharedPtr colStats = colStatsList.getColStatsPtrForPredicate(id);

 		if (colStats == NULL)
 		return FALSE;

 		if (colStats->isOrigFakeHist() || colStats->isSmallSampleHistogram())
 		return TRUE;
        }

        return FALSE;
 }

 // This method computes the base selectivity for this table. It is defined
 // cardinality after applying all local predicates with empty input logical
 // properties over the base cardinality without hints

 CostScalar Scan::computeBaseSelectivity() const
 {
   CostScalar scanCardWithoutHint = getGroupAttr()->outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP))->\
     				  getColStats().getScanRowCountWithoutHint();

   double cardAfterLocalPreds = scanCardWithoutHint.getValue();
   double baseRowCount = getTableDesc()->tableColStats()[0]->getColStats()->getRowcount().getValue() ;

   // both the minimum and the base row count have to be minimum 1.
   // This is ensured in the called routines. So no need to check here.

   return cardAfterLocalPreds/baseRowCount;
 }

 // This method computes the ratio of selectivity obtained with and without hint
 // and sets that in the cardinalityHint

 void
 TableDesc::setBaseSelectivityHintForScan(CardinalityHint *cardHint,
 					 CostScalar baseSelectivity)
 {
   double cardinalityHint = cardHint->getScanCardinality().getValue();
   CostScalar baseRowCount = tableColStats()[0]->getColStats()->getRowcount().getValue();
   double selectivityProportion;

   // cardinalityHint is minimum one. This is checked at the time of setting
   // it in the tableDesc. So, is the baseRowCount

   double selectivityFactor = cardinalityHint / baseRowCount.getValue();
   // selectivityProportion becomes invalid if selectivityFactor is
   // less than baseSelectivity. That causes an assertion failure
   // This was never caught earlier, maybe becasue we never tested hints with OR
   // predicates.
   selectivityProportion = log(selectivityFactor) / log(baseSelectivity.getValue());
   cardHint->setBaseScanSelectivityFactor(selectivityProportion);
   cardHint->setScanSelectivity(selectivityFactor);
   setCardinalityHint(cardHint);
   return;
 }

 // This method computes the ratio of selectivity obtained with and without hint
 // and sets that in the selectivityHint

 void
 TableDesc::setBaseSelectivityHintForScan(SelectivityHint *selHint,
 					 CostScalar baseSelectivity)
 {
   double selectivityFactor = selHint->getScanSelectivityFactor();

   double selectivityProportion;
   if (selectivityFactor == 0)
     selectivityProportion = 1.0;
   else
     selectivityProportion = log(selectivityFactor) / log(baseSelectivity.getValue());

   selHint->setBaseScanSelectivityFactor(selectivityProportion);
   setSelectivityHint(selHint);
   return;
 }


 // -----------------------------------------------------------------------
 // Print function for TableDesc
 // -----------------------------------------------------------------------
 void TableDesc::print(FILE* ofd, const char* indent, const char* title)
 {
 #ifndef NDEBUG
   BUMP_INDENT(indent);
   cout << title << " " << this << " NATable=" << (void*)table_
 	<< " ix=" << indexes_.entries() << "," << clusteringIndex_
 	<< " stat=" << colStats_.entries() << endl;
   for (CollIndex i = 0; i < indexes_.entries(); i++)
     indexes_[i]->print(ofd, indent, "TableDesc::indexes");
   clusteringIndex_->print(ofd, indent, "TableDesc::clusteringIndex_");
   corrName_.print(ofd, indent);
   colList_.print(ofd, indent, "TableDesc::colList_");
   colVEGList_.print(ofd, indent, "TableDesc::colVEGList_");
   cout << "cnstrnt=" << checkConstraints_.entries() << endl << endl;

 #endif
 } // TableDesc::print()

 // -----------------------------------------------------------------------
 // Print function for TableDescList
 // -----------------------------------------------------------------------
 void TableDescList::print(FILE* ofd, const char* indent, const char* title)
 {
 #ifndef NDEBUG
   BUMP_INDENT(indent);

   for (CollIndex i = 0; i < entries(); i++)
     {
       fprintf(ofd,"%s%s[%2d] = (%p)\n",NEW_INDENT,title,i,at(i));
       // at(i)->print(ofd,indent);
     }
 #endif
 } // TableDescList::print()
 CardinalityHint::CardinalityHint(CostScalar scanCardinality)
 {
   scanCardinality_ = scanCardinality;
   scanSelectivity_ = -1.0;
   localPreds_.clear();
   baseSelectivity_ = -1.0;
 }

 // constructor defined with local predicates
 CardinalityHint::CardinalityHint(CostScalar scanCardinality,
 				 const ValueIdSet & localPreds)
 {
   scanCardinality_ = scanCardinality;
   scanSelectivity_ = -1.0;
   localPreds_ = localPreds;
   baseSelectivity_ = -1.0;
 }
 SelectivityHint::SelectivityHint(double selectivityFactor)
 {
   selectivityFactor_ = selectivityFactor;
   localPreds_.clear();
   baseSelectivity_ = -1.0;
 }

 void SelectivityHint::setScanSelectivityFactor (double selectivityFactor)
 {
   // This method is called only for selectivityFactor >= 0.0

   if (selectivityFactor > 1.0)
     selectivityFactor_ = 1.0;
   else
     selectivityFactor_ = selectivityFactor ;
 }

 CostScalar
 TableDesc::getBaseRowCntIfUniqueJoinCol(const ValueIdSet &joinedCols)

 {
   // get the joining columns for this table
   ValueIdList userColumns;

   // get All user columns for this table;
   getUserColumnList(userColumns);
   ValueIdSet userColumnSet(userColumns);

   ValueIdSet joinedColsCopy(joinedCols);

   ValueIdSet thisTableJoinCols = joinedColsCopy.intersect(userColumnSet);

   if (thisTableJoinCols.isEmpty() )
 	return csMinusOne;

   CostScalar baseRowCount = csMinusOne;

   if (thisTableJoinCols.doColumnsConstituteUniqueIndex(this) )
     baseRowCount = tableColStats()[0]->getColStats()->getRowcount();

   return baseRowCount;

 } // TableDesc::getBaseRowCntIfUniqueJoinCol


 ValueIdSet TableDesc::getComputedColumns(NAColumnBooleanFuncPtrT fptr)
 {
   ValueIdSet computedColumns;

   for (CollIndex j=0; j<getClusteringIndex()->getIndexKey().entries(); j++)
     {
       ItemExpr *ck = getClusteringIndex()->getIndexKey()[j].getItemExpr();

       if (ck->getOperatorType() == ITM_INDEXCOLUMN)
         ck = ((IndexColumn *) ck)->getDefinition().getItemExpr();

       CMPASSERT(ck->getOperatorType() == ITM_BASECOLUMN);

       NAColumn* x = ((BaseColumn *) ck)->getNAColumn();

       if (((*x).*fptr)())
          computedColumns += ck->getValueId();
     }
    return computedColumns;
 }


 ValueIdSet TableDesc::getSaltColumnAsSet()
 {
   return getComputedColumns(&NAColumn::isSaltColumn);
 }

 ValueIdSet TableDesc::getDivisioningColumns()
 {
   return getComputedColumns(&NAColumn::isDivisioningColumn);
 }

 // compress the histograms based on query predicates on this table
 void TableDesc::compressHistogramsForCurrentQuery()
 {

   // if there are some column statistics
   if ((colStats_.entries() != 0) &&
       (table_) &&
       (table_->getExtendedQualName().getSpecialType() == ExtendedQualName::NORMAL_TABLE))
   { // if 1
     // check if query analysis info is available
     if(QueryAnalysis::Instance()->isAnalysisON())
     { // if 2
       // get a handle to the query analysis
       QueryAnalysis* queryAnalysis = QueryAnalysis::Instance();

       // get a handle to the table analysis
       const TableAnalysis * tableAnalysis = getTableAnalysis();

       if(!tableAnalysis)
         return;

       // iterate over statistics for each column
       for(CollIndex i = 0; i < colStats_.entries(); i++)
       { // for 1
         // Get a handle to the column's statistics descriptor
         ColStatDescSharedPtr columnStatDesc = colStats_[i];

         // get a handle to the ColStats
         ColStatsSharedPtr colStats = columnStatDesc->getColStats();

         // if this is a single column, as opposed to a multicolumn
         if(colStats->getStatColumns().entries() == 1)
         { // if 3
           // get column's value id
           const ValueId columnId = columnStatDesc->getColumn();

           // get column analysis
           ColAnalysis* colAnalysis = queryAnalysis->getColAnalysis(columnId);

           if(!colAnalysis) continue;

           ValueIdSet predicatesOnColumn =
             colAnalysis->getReferencingPreds();

           // we can compress this column's histogram if there
           // is a equality predicate against a constant

           ItemExpr *constant = NULL;

           NABoolean colHasEqualityAgainstConst =
             colAnalysis->getConstValue(constant);

           // if a equality predicate with a constant was found
           // i.e. predicate of the form col = 5
           if (colHasEqualityAgainstConst)
           { // if 4
             if (constant)
               // compress the histogram
               columnStatDesc->compressColStatsForQueryPreds(constant,constant);
           } // if 4
           else{ // else 4

             // since there is no equality predicates we might still
             // be able to compress the column's histogram based on
             // range predicates against a constant. Following are
             // examples of such predicates
             // * col > 1 <-- predicate defines a lower bound
             // * col < 3 <-- predicate defines a upper bound
             // * col >1 and col < 30 <-- window predicate, define both bounds
             ItemExpr * lowerBound = NULL;
             ItemExpr * upperBound = NULL;

             // Extract predicates from range spec and add it to the
             // original predicate set otherwise isARangePredicate() will
             // return FALSE, so histgram compression won't happen.
             ValueIdSet rangeSpecPred(predicatesOnColumn);
             for (ValueId predId= rangeSpecPred.init();
                                  rangeSpecPred.next(predId);
                                  rangeSpecPred.advance(predId))
             {
               ItemExpr * pred = predId.getItemExpr();
               if ( pred->getOperatorType() == ITM_RANGE_SPEC_FUNC )
               {
                 ValueIdSet vs;
                 ((RangeSpecRef *)pred)->getValueIdSetForReconsItemExpr(vs);
                 // remove rangespec vid from the original set
                 predicatesOnColumn.remove(predId);
                 // add preds extracted from rangespec to the original set
                 predicatesOnColumn.insert(vs);
               }
             }

             // in the following loop we iterate over all the predicates
             // on this column. If there is a range predicate e.g. a > 2
             // or a < 3, then we use that to define upper and lower bounds.
             // Given predicate a > 2, we get a lower bound of 2.
             // Given predicate a < 3, we get a upper bound of 3.
             // The bound are then passed down to the histogram
             // compression methods.

             // iterate over predicates to see if any of them is a range
             // predicate e.g. a > 2
             for (ValueId predId= predicatesOnColumn.init();
                                  predicatesOnColumn.next(predId);
                                  predicatesOnColumn.advance(predId))
             { // for 2
               // check if this predicate is a range predicate
               ItemExpr * predicateOnColumn = predId.getItemExpr();
               if (predicateOnColumn->isARangePredicate())
               { // if 5

                 // if a predicate is a range predicate we need to find out more
                 // information regarding the predicate to see if it can be used
                 // to compress the columns histogram. We look for the following:
                 // * The predicate is against a constant e.g. a > 3 and not against
                 //   another column e.g. a > b
                 // Also give a predicate we need to find out what side is the column
                 // and what side is the constant. Normally people write a range predicate
                 // as a > 3, but the same could be written as 3 < a.
                 // Also either on of the operands of the range predicate might be
                 // a VEG, if so then we need to dig into the VEG to see where is
                 // the constant and where is the column.

                 // check the right and left children of this predicate to
                 // see if one of them is a constant
                 ItemExpr * leftChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(0);
                 ItemExpr * rightChildItemExpr = (ItemExpr *) predicateOnColumn->getChild(1);

                 // by default assume the literal is at right i.e. predicate of
                 // the form a > 2
                 NABoolean columnAtRight = FALSE;

                 // check if right child of predicate is a VEG
                 if ( rightChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE)
                 { // if 6
                   // if child is a VEG
                   VEGReference * rightChildVEG = (VEGReference *) rightChildItemExpr;

                   // check if the VEG contains the current column
                   // if it does contain the current column then
                   // the predicate has the column on right and potentially
                   // a constant on the left.
                   if(rightChildVEG->getVEG()->getAllValues().contains(columnId))
                   { // if 7
                     // column is at right i.e. predicate is of the form
                     // 2 < a
                     columnAtRight = TRUE;
                   } // if 7
                 } // if 6
                 else{ // else 6
                   // child is not a VEG
                   if ( columnId == rightChildItemExpr->getValueId() )
                   { // if 8
                     // literals are at left i.e. predicate is of the form
                     // (1,2) < (a, b)
                     columnAtRight = TRUE;
                   } // if 8
                 } // else 6

                 ItemExpr * potentialConstantExpr = NULL;

                 // check if the range predicate is against a constant
                 if (columnAtRight)
                 { // if 9
                   // the left child is potentially a constant
                   potentialConstantExpr = leftChildItemExpr;
                 } // if 9
                 else{ // else 9
                   // the right child is potentially a constant
                   potentialConstantExpr = rightChildItemExpr;
                 } // else 9

                 // initialize constant to NULL before
                 // looking for next constant
                 constant = NULL;

                 // check if potentialConstantExpr contains a constant.
                 // we need to see if this range predicate is a predicate
                 // against a constant e.g col > 1 and not a predicate
                 // against another column e.g. col > anothercol

                 // if the expression is a VEG
                 if ( potentialConstantExpr->getOperatorType() == ITM_VEG_REFERENCE)
                 { // if 10

                   // expression is a VEG, dig into the VEG to
                   // get see if it contains a constant
                   VEGReference * potentialConstantExprVEG =
                     (VEGReference *) potentialConstantExpr;

                   potentialConstantExprVEG->getVEG()->\
                     getAllValues().referencesAConstValue(&constant);
                 } // if 10
                 else{ // else 10

                   // express is not a VEG, it is a constant
                   if ( potentialConstantExpr->getOperatorType() == ITM_CONSTANT )
                     constant = potentialConstantExpr;
                 } // else 10

                 // if predicate involves a constant, does the constant imply
                 // a upper bound or lower bound
                 if (constant)
                 { // if 11
                   // if range predicate has column at right e.g. 3 > a
                   if (columnAtRight)
                   { // if 12
                     if ( predicateOnColumn->getOperatorType() == ITM_GREATER ||
                          predicateOnColumn->getOperatorType() == ITM_GREATER_EQ)
                     { // if 13
                       if (!upperBound)
                         upperBound = constant;
                     } // if 13
                     else
                     { // else 13
                       if (!lowerBound)
                         lowerBound = constant;
                     } // else 13
                   } // if 12
                   else{ // else 12
                     // range predicate has column at left e.g. a < 3
                     if ( predicateOnColumn->getOperatorType() == ITM_LESS ||
                          predicateOnColumn->getOperatorType() == ITM_LESS_EQ)
                     { // if 14
                       if (!upperBound)
                         upperBound = constant;
                     } // if 14
                     else
                     { // else 14
                       if (!lowerBound)
                         lowerBound = constant;
                     } // else 14
                   } // else 12
                 } // if 11
               } // if 5
             } // for 2

             // if we found a upper bound or a lower bound
             if (lowerBound || upperBound)
             {
               // compress the histogram based on range predicates
               columnStatDesc->compressColStatsForQueryPreds(lowerBound, upperBound);
             }
           } // else 4
         } // if 3
       } // for 1
     } // if 2
   } // if 1
 	  // All histograms compressed. Set the histCompressed flag to TRUE
 	  histsCompressed(TRUE);
 }

 NABoolean TableDesc::splitHiveLocation(const char *tableLocation,
                                        NAString &hdfsHost,
                                        Int32 &hdfsPort,
                                        NAString &tableDir,
                                        ComDiagsArea *diags,
                                        int hdfsPortOverride)
 {
   HHDFSDiags hhdfsDiags;

   NABoolean result = HHDFSTableStats::splitLocation(
        tableLocation,
        hdfsHost,
        hdfsPort,
        tableDir,
        hhdfsDiags,
        hdfsPortOverride);

   if (!result)
     {
       if (!hhdfsDiags.isSuccess())
         {
           if (diags)
             (*diags) << DgSqlCode(-1215)
                      << DgString0(tableLocation)
                      << DgString1(hhdfsDiags.getErrMsg());
         }
       else
         CMPASSERT(0);
     }
   else
     CMPASSERT(hhdfsDiags.isSuccess());

   return result;
 }