core/sql/optimizer/RelSample.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
 ******************************************************************************
 *
 * File:         RelSample.cpp
 * Description:  All the methods of RelSample() and PhysSample().
 *
 * Created:      9/24/98
 * Language:     C++
 *
 *
 ******************************************************************************
 */
 #include "AllItemExpr.h"
 #include "ItemSample.h"
 #include "AllRelExpr.h"
 #include "RelSample.h"
 #include "SchemaDB.h"
 #include "GroupAttr.h"
 #include "BindWA.h"
 #include "NormWA.h"
 #include "Cost.h"
 #include "CostMethod.h"
 #include "opt.h"
 #include "Globals.h"
 #include "SqlParserAux.h"

 // -----------------------------------------------------------------------
 // This file contains all the methods for the class RelSample.
 // This is a departure from the way other RelExpr's are organized.
 //


 ///////////////////////////////////////////////////////////////////////////
 //
 //            RelSample Class
 //
 //
 ///////////////////////////////////////////////////////////////////////////

 RelSample::RelSample(RelExpr *child,
                      SampleTypeEnum sampleType,
                      ItemExpr *balanceExpr,
                      ItemExpr *requiredOrder,
                      CollHeap *oHeap)
   : RelExpr(REL_SAMPLE, child, NULL, oHeap),
     balanceExprTree_(balanceExpr),
     sampleType_(sampleType),
     sampleScanSucceeded_(FALSE),
     requiredOrderTree_(requiredOrder)
 {
   setNonCacheable();
   if (balanceExprTree_ != NULL)
   {
     ((ItmBalance *)balanceExprTree_)->propagateSampleType(sampleType);
     ((ItmBalance *)balanceExprTree_)->rearrangeChildren();
   }
 }

 RelSample::~RelSample()
 {
 }

 ItemExpr *
 RelSample::removeRequiredOrderTree()
 {
   ItemExpr *requiredOrderTree = requiredOrderTree_;

   requiredOrderTree_ = NULL;

   return requiredOrderTree;
 }


 Float32 RelSample::getSamplePercent() const
 {
   // Call ONLY if: RANDOM or CLUSTER sampling and relative size
   // and NOT stratified sampling
   if (sampleType() != RANDOM &&
       sampleType() != CLUSTER)
     return -1.0;

   ItmBalance * balExp = NULL;

   if (balanceExprTree_ != NULL)
     balExp = (ItmBalance *)balanceExprTree_;
   else
   {
     if (NOT balanceExpr().isEmpty())
     {
       ValueId exprId;
       balanceExpr().getFirst(exprId);
       balExp = (ItmBalance *)(exprId.getItemExpr());
     }
   }

   if (balExp != NULL)
   {
     if ((balExp->isAbsolute() == TRUE) OR
         (balExp->getNextBalance() != NULL))
       return -1.0;

     double size = balExp->getSampleConstValue();
     size = size / 100; // Size specified as percent
     return (float)size;
   }
   return -1.0;
 }

 Lng32 RelSample::getClusterSize() const
 {
   // Call ONLY if: CLUSTER sampling
   // and NOT stratified sampling
   if (sampleType() != CLUSTER)
     return -1;

   ItmBalance * balExp = NULL;

   if (balanceExprTree_ != NULL)
     balExp = (ItmBalance *)balanceExprTree_;
   else
   {
     if (NOT balanceExpr().isEmpty())
     {
       ValueId exprId;
       balanceExpr().getFirst(exprId);
       balExp = (ItmBalance *)(exprId.getItemExpr());
     }
   }

   if (balExp != NULL)
   {
     if (balExp->getNextBalance() != NULL)
       return -1;

     double size = balExp->getClusterConstValue();
     return (Lng32)size;
   }
   return -1;
 }

 NABoolean RelSample::isSimpleRandomRelative() const
 {
   // True only if: RANDOM sampling and relative size
   // and NOT stratified sampling (except for random
   // sample of a single stratum).
   if (getSamplePercent() == -1.0)
     return FALSE;
   else
     return TRUE;
 }

 // RelSample::topHash() --------------------------------------------------
 // Compute a hash value for a chain of derived RelExpr nodes.
 // Used by the Cascade engine as a quick way to determine if
 // two nodes are identical.
 // Can produce false positives (nodes appear to be identical),
 // but should not produce false negatives (nodes are definitely different)
 //
 // Inputs: none (other than 'this')
 //
 // Outputs: A HashValue of this node and all nodes in the
 // derivation chain below (towards the base class) this node.
 //
 HashValue RelSample::topHash()
 {
   // Compute a hash value of the derivation chain below this node.
   //
   HashValue result = RelExpr::topHash();

   result ^= sampleType();
   result ^= balanceExpr();
   result ^= sampledColumns();

   result ^= requiredOrder();

   return result;
 }

 // RelSample::duplicateMatch()
 // A more thorough method to compare two RelExpr nodes.
 // Used by the Cascades engine when the topHash() of two
 // nodes returns the same hash values.
 //
 // Inputs: other - a reference to another node of the same type.
 //
 // Outputs: NABoolean - TRUE if this node is 'identical' to the
 //          'other' node. FALSE otherwise.
 //
 // In order to match, this node must match all the way down the
 // derivation chain to the RelExpr class.
 //
 NABoolean
 RelSample::duplicateMatch(const RelExpr & other) const
 {
   // Compare this node with 'other' down the derivation chain.
   //
   if (!RelExpr::duplicateMatch(other))
     return FALSE;

   // Cast the RelExpr to a RelSample node.
   //
   RelSample &o = (RelSample &) other;

   // If the sampling type and sample size expressions are same then the
   // nodes are identical
   //
   if (!(sampleType() == o.sampleType()))
     return FALSE;

   if(!(balanceExpr() == o.balanceExpr()))
     return FALSE;

   // If the required order keys are not the same
   // then the nodes are not identical
   //
   if (!(requiredOrder() == o.requiredOrder()))
     return FALSE;

   return TRUE;
 }

 // RelSample::copyTopNode ----------------------------------------------
 // Copy a chain of derived nodes (Calls RelExpr::copyTopNode).
 // Needs to copy all relevant fields.
 // Used by the Cascades engine.
 //
 // Inputs: derivedNode - If Non-NULL this should point to a node
 //         which is derived from this node.  If NULL, then this
 //         node is the top of the derivation chain and a node must
 //         be constructed.
 //
 // Outputs: RelExpr * - A Copy of this node.
 //
 // If the 'derivedNode is non-NULL, then this method is being called
 // from a copyTopNode method on a class derived from this one. If it
 // is NULL, then this is the top of the derivation chain and an UnPackRows
 // node must be constructed.
 //
 // In either case, the relevant data members must be copied to 'derivedNode'
 // and 'derivedNode' is passed to the copyTopNode method of the class
 // below this one in the derivation chain (RelExpr::copyTopNode() in this
 // case).
 //
 RelExpr *
 RelSample::copyTopNode(RelExpr *derivedNode, CollHeap *outHeap)
 {
   RelSample *result;

   if (derivedNode == NULL)
     // This is the top of the derivation chain
     result = new (outHeap) RelSample(child(0),
                                      sampleType(),
                                      balanceExprTree()
                                      );
   else
     // A node has already been constructed as a derived class.
     result = (RelSample *) derivedNode;

   // Copy the relavant fields.

   result->sampleType_ = sampleType();

   result->sampleScanSucceeded_ = sampleScanSucceeded();

   if (balanceExprTree() != NULL)
     result->balanceExprTree() = balanceExprTree()->copyTree(outHeap)->castToItemExpr();

   result->balanceExpr() = balanceExpr();

   result->sampledColumns() = sampledColumns();

   result->requiredOrder() = requiredOrder();

   // Copy any data members from the classes lower in the derivation chain.
   //
   return RelExpr::copyTopNode(result, outHeap);
 }


 // RelSample::addLocalExpr() -----------------------------------------------
 // Insert into a list of expressions all the expressions of this node and
 // all nodes below this node in the derivation chain. Insert into a list of
 // names, all the names of the expressions of this node and all nodes below
 // this node in the derivation chain. This method is used by the GUI tool
 // and by the Explain Function to have a common method to get all the
 // expressions associated with a node.
 //
 // Inputs/Outputs: xlist - a list of expressions.
 //                 llist - a list of names of expressions.
 //
 // The xlist contains a list of all the expressions associated with this
 // node. The llist contains the names of these expressions. (This lists
 // must be kept in the same order).
 // RelSample::addLocalExpr potentially adds the balance expression
 // ("balance_expression").
 //
 // It then calls RelExpr::addLocalExpr() which will add any RelExpr
 // expressions to the list.
 //
 void RelSample::addLocalExpr(LIST(ExprNode *) &xlist,
                              LIST(NAString) &llist) const
 {
   if (sampledColumns().entries() > 0)
   {
     xlist.insert(sampledColumns().rebuildExprTree(ITM_ITEM_LIST));
     llist.insert("sampled_columns");
   }

   if (balanceExprTree() || balanceExpr().entries() > 0)
   {
     if (balanceExprTree_)
       xlist.insert(balanceExprTree_);
     else
       xlist.insert(balanceExpr().rebuildExprTree(ITM_ITEM_LIST));
     llist.insert("balance_expression");
   }

   if (requiredOrderTree_) {
     xlist.insert(requiredOrderTree_);
     llist.insert("required_order");
   } else if (requiredOrder().entries() > 0) {
     xlist.insert(requiredOrder().rebuildExprTree(ITM_ITEM_LIST));
     llist.insert("required_order");
   }

   RelExpr::addLocalExpr(xlist,llist);
 }

 // RelSample::getPotentialOutputValues() ---------------------------------
 // Construct a Set of the potential outputs of this node.
 //
 // Inputs: none (other than 'this')
 //
 // Outputs: outputValues - a ValueIdSet representing the potential outputs
 //          of this node.
 //
 // The potential outputs for the RelSample node are the new columns
 // generated by the RelSample node. The new columns generated by RelSample
 // node are "Sampled" versions of all the potential outputs of its child.
 //
 void
 RelSample::getPotentialOutputValues(ValueIdSet & outputValues) const
 {
   outputValues += sampledColumns();
 }

 // RelSample::pushdownCoveredExpr() ------------------------------------
 //
 // In order to compute the Group Attributes for a relational operator
 // an analysis of all the scalar expressions associated with it is
 // performed. The purpose of this analysis is to identify the sources
 // of the values that each expression requires. As a result of this
 // analysis values are categorized as external dataflow inputs or
 // those that can be produced completely by a certain child of the
 // relational operator.
 //
 // This method is invoked on each relational operator. It causes
 // a) the pushdown of predicates and
 // b) the recomputation of the Group Attributes of each child.
 //    The recomputation is required either because the child is
 //    assigned new predicates or is expected to compute some of the
 //    expressions that are required by its parent.
 //
 // For the sample operator, only the balance expression contains references
 // to any outputs produced by the child. These expressions refer to the unsampled
 // columns and therefore can be pushed down. However, the predicatesOnParent if
 // they exist refer to the sampled columns and therefore they will not be pushed
 // down.
 // ---------------------------------------------------------------------

 void
 RelSample::pushdownCoveredExpr(const ValueIdSet &outputExpr,
                                const ValueIdSet &newExternalInputs,
                                ValueIdSet &predicatesOnParent,
 			       const ValueIdSet *setOfValuesReqdByParent,
                                Lng32 childIndex
                               )
 {
   ValueIdSet exprOnParent;
   if (setOfValuesReqdByParent)
     exprOnParent = *setOfValuesReqdByParent;
   exprOnParent += outputExpr;
   ValueId refVal;
   ValueIdSet outputSet;

   // Prune from the sampledColumns() ValueIdSet, those expressions
   // that are not needed above (in setOfValuesReqdByParent) or by
   // the selectionPred.
   //
   for(ValueId sampleCol = sampledColumns().init(); sampledColumns().next(sampleCol);
   sampledColumns().advance(sampleCol)) {
     if(!exprOnParent.referencesTheGivenValue(sampleCol, refVal) &&
       !selectionPred().referencesTheGivenValue(sampleCol, refVal)) {
       sampledColumns() -= sampleCol;
     }
   }

   // Remove all expressions from exprOnParent.  They
   // can't be pushed down!
   //
   exprOnParent.clear();

   // Add all the values required for the Sample expressions
   // to the values required by the parent.  These expression
   // can't be pushed down either, but attempting to push them
   // down causes the child node to provide the values needed.
   //
   outputSet += sampledColumns();

   exprOnParent += balanceExpr();

   exprOnParent.insertList(requiredOrder());

   RelExpr::pushdownCoveredExpr(outputSet,
                                newExternalInputs,
                                predicatesOnParent,
 			       &exprOnParent,
                                childIndex);

 } // RelSample::pushdownCoveredExpr

 Context* RelSample::createContextForAChild(Context* myContext,
                                            PlanWorkSpace* pws,
                                            Lng32& childIndex)
 {
   // ---------------------------------------------------------------------
   // If one Context has been generated for each child, return NULL
   // to signal completion.
   // ---------------------------------------------------------------------
   if (pws->getCountOfChildContexts() == getArity())
     return NULL;

   childIndex = 0;

   Lng32 planNumber = 0;
   const ReqdPhysicalProperty* rppForMe = myContext->getReqdPhysicalProperty();
   PartitioningRequirement* partReqForMe =
     rppForMe->getPartitioningRequirement();

   // If a partitioning requirement exists and it requires broadcast
   // replication, then return NULL now. Only an exchange operator
   // can satisfy a broadcast replication partitioning requirement.
   if ((partReqForMe != NULL) AND
       partReqForMe->isRequirementReplicateViaBroadcast())
     return NULL;

   RequirementGenerator rg(child(0),rppForMe);

   // ---------------------------------------------------------------------
   // Add the order requirements needed for this RelSample node
   // ---------------------------------------------------------------------

   // Remove any sort order requirement from parent.
   //
   rg.removeSortKey();
   rg.removeArrangement();
   rg.removeSortOrderTypeReq();

   // Shouldn't/Can't add a sort order type requirement
   // if we are in DP2
   if (rppForMe->executeInDP2())
     rg.addSortKey(requiredOrder(),NO_SOT);
   else
     rg.addSortKey(requiredOrder(),ESP_SOT);

   // Can not execute absolute sampling in parallel
   //
   if(!isSimpleRandomRelative())
     rg.addNumOfPartitions(1);

   // ---------------------------------------------------------------------
   // Done adding all the requirements together, now see whether it worked
   // and give up if it is not possible to satisfy them
   // ---------------------------------------------------------------------
   if (NOT rg.checkFeasibility())
     return NULL;

   // ---------------------------------------------------------------------
   // Compute the cost limit to be applied to the child.
   // ---------------------------------------------------------------------
   CostLimit* costLimit = computeCostLimit(myContext, pws);

   // ---------------------------------------------------------------------
   // Get a Context for optimizing the child.
   // Search for an existing Context in the CascadesGroup to which the
   // child belongs that requires the same properties as those in
   // rppForChild. Reuse it, if found. Otherwise, create a new Context
   // that contains rppForChild as the required physical properties..
   // ---------------------------------------------------------------------
   Context* result = shareContext(childIndex,
                                  rg.produceRequirement(),
                                  myContext->getInputPhysicalProperty(),
                                  costLimit,
                                  myContext,
                                  myContext->getInputLogProp());

   // ---------------------------------------------------------------------
   // Store the Context for the child in the PlanWorkSpace.
   // ---------------------------------------------------------------------
   pws->storeChildContext(childIndex, planNumber, result);

   return result;

 } // RelSample::createContextForAChild()

 // RelSample::removeBalanceExprTree() -------------------------------------
 // Return the sizeExprTree_ ItemExpr tree and set to NULL,
 //
 // Inputs: none (Other than 'this')
 //
 // Outputs: ItemExpr * - the value of sizeExprTree_
 //
 // Side Effects: Sets the value of sizeExprTree_ to NULL.
 //
 // Called by RelSample::bindNode(). The value of sizeExprTree_ is not
 // needed after the binder.
 //
 ItemExpr *
 RelSample::removeBalanceExprTree()
 {
   ItemExpr *result = balanceExprTree();
   balanceExprTree_ = (ItemExpr *)NULL;
   return result;
 }

 // RelSample::transformNode() -------------------------------------------
 // Unconditional query transformations such as the transformation of
 // a subquery to a semijoin are implemented by the virtual function
 // transformNode(). The aim of such transformations is to bring the
 // query tree to a canonical form. transformNode() also ensures
 // that the "required" (or characteristic) input values are "minimal"
 // and the "required" (or characteristic) outputs values are
 // "maximal" for each operator.
 //
 // transformNode() is an overloaded name, which is used for a set
 // of methods that implement the transformation phase of query
 // normalization.
 //
 // We use the term query tree for a tree of relational operators,
 // each of which can contain none or more scalar expression trees.
 // The transformations performed by transformNode() brings scalar
 // expressions into a canonical form. The effect of most such
 // transformations is local to the scalar expression tree.
 // However, the transformation of a subquery requires a semijoin
 // to be performed between the relational operator that contains
 // the subquery and the query tree for the subquery. The effect
 // of such a subquery transformation is therefore visible not
 // only in the scalar expression tree but also in the relational
 // expression tree.
 //
 // Parameters:
 //
 // NormWA & normWARef
 //    IN : a pointer to the normalizer work area
 //
 // ExprGroupId & locationOfPointerToMe
 //    IN : a reference to the location that contains a pointer to
 //         the RelExpr that is currently being processed.
 //
 void RelSample::transformNode(NormWA &normWARef,
                               ExprGroupId &locationOfPointerToMe)
 {
   CMPASSERT( this == locationOfPointerToMe );

   // If this node has already been transformed, we are done.
   //
   if (nodeIsTransformed())
     return;

   // Make sure that it is only transformed once.
   //
   markAsTransformed();

   //Sample node does not pull up the predicates and so the equality
   //predicates on below this node are not true above this node,
   //so create a new VEGRegion when transfroming the child
   normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,this);

   // transformNode takes up a bound tree and turns into a transformed
   // tree. For a RelExpr that means the following.
   //    + expressions are transformed. If the expressions contain
   //        subqueries then new RelExpr are created for them and
   //        they are usually added above (as an ancestor) of the node
   //        that contained them.
   //    + predicates are pulled up from the children and their
   //        required inputs are modified
   //    + the required inputs of the node itself are changed from
   //        being a sufficient set to being a sufficient minimal set.
   //
   // Transform the child.
   // Pull up their transformed predicates
   // recompute their required inputs.
   //
   child(0)->transformNode(normWARef, child(0));

   if(balanceExpr().transformNode(normWARef,
     child(0),
     getGroupAttr()->getCharacteristicInputs()))
   {
     // -----------------------------------------------------------------
     // Transform my new child
     // -----------------------------------------------------------------
     child(0)->transformNode(normWARef, child(0));
   }

   if(requiredOrder().
      transformNode(normWARef,
                    child(0),
                    getGroupAttr()->getCharacteristicInputs())) {

     // The requiredOrder list apparently had some subqueries that had
     // not been processed before (is this possible?). Normalize the
     // new tree that has become our child.
     //
     child(0)->transformNode(normWARef, child(0));
   }

   normWARef.restoreOriginalVEGRegion();

   // Pull up the predicates and recompute the required inputs
   // of whoever my children are now.
   //
   pullUpPreds();


   // transform the selection predicates
   //
   transformSelectPred(normWARef, locationOfPointerToMe);

 } // RelSample::transformNode()

 // RelSample::rewriteNode() ---------------------------------------------
 // rewriteNode() is the virtual function that computes
 // the transitive closure for "=" predicates and rewrites value
 // expressions.
 //
 // Parameters:
 //
 // NormWA & normWARef
 //    IN : a pointer to the normalizer work area
 //
 void RelSample::rewriteNode(NormWA & normWARef)
 {

   // locate the VEGRegion that was created for this node during transformation
   normWARef.locateAndSetVEGRegion(this);

   child(0)->rewriteNode(normWARef);

   balanceExpr().normalizeNode(normWARef);

   requiredOrder().normalizeNode(normWARef);

   normWARef.restoreOriginalVEGRegion();

   selectionPred().normalizeNode(normWARef);

   // rewrite expression in the group attributes
   getGroupAttr()->normalizeInputsAndOutputs(normWARef);

 } // RelSample::rewriteNode()


 RelExpr * RelSample::normalizeNode(NormWA & normWARef)
 {
   if (nodeIsNormalized())
     return this;
   markAsNormalized();

   pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
                       getGroupAttr()->getCharacteristicInputs(),
                       selectionPred());

   // locate the VEGRegion that was created for this node during transformation
   normWARef.locateAndSetVEGRegion(this);

   child(0) = child(0)->normalizeNode(normWARef);

   normWARef.restoreOriginalVEGRegion();

   fixEssentialCharacteristicOutputs();

   return this;
 }


 // RelSample::pullUpPreds() --------------------------------------------
 // is redefined to disallow the pullup of predicates
 // from the operator's child. The outputs of the sample operator
 // are "sampled" versions of the outputs of its child.
 //
 void RelSample::pullUpPreds()
 {
   // ---------------------------------------------------------------------
   // Simply don't pull up child's selection predicates. Still need to tell
   // child to recompute its outer references due to the warning below.
   // ---------------------------------------------------------------------
   child(0)->recomputeOuterReferences();

   // ---------------------------------------------------------------------
   // WARNING: One rule that this procedure must follow is
   // that recomputeOuterReferences() must be called on the children even
   // if no predicates are pulled up from them. This is to correct
   // the outer references that are added to a right child of a
   // semi or outer join when processing subqueries in the ON clause.
   // ---------------------------------------------------------------------
 }

 // RelSample::recomputeOuterReferences() --------------------------------
 // This method is used by the normalizer for recomputing the
 // outer references (external dataflow input values) that are
 // still referenced by each operator in the subquery tree
 // after the predicate pull up is complete.
 //
 // Side Effects: sets the characteristicInputs of the groupAttr.
 //
 void RelSample::recomputeOuterReferences()
 {
   // This is virtual method on RelExpr.
   // When this is called it is assumed that the children have already
   // been transformed.
   // The required inputs of the child are therefore already minimal
   // and sufficient.
   // It is also assumed that the RelExpr itself has been bound.
   // That implies that the group attributes have already been allocated
   // and the required inputs is a sufficient (but not necessarilly minimum)
   // set of external values needed to evaluate all expressions in this subtree.
   //
   // Delete all those input values that are no longer referenced on
   // this operator because the predicates that reference them have
   // been pulled up.
   //
   ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();

   // The set of valueIds need by this node.
   //
   ValueIdSet allMyExpr(getSelectionPred());

   allMyExpr += balanceExpr();

   allMyExpr.insertList(requiredOrder());


   // Remove from outerRefs those valueIds that are not needed
   // by all my expressions
   //
   allMyExpr.weedOutUnreferenced(outerRefs);

   // Add to outerRefs those that my children need.
   //
   outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();

   // set my Character Inputs to this new minimal set.
   //
   getGroupAttr()->setCharacteristicInputs(outerRefs);
 } // RelSample::recomputeOuterReferences()


 CostScalar RelSample::computeResultSize(const CostScalar &childCardinality)
 {

   // CostScalar resultSize;

   // Compute the result size based on the size expression.
   // For now, set it to the child cardinality

   CMPASSERT(balanceExpr().entries() == 1);

   ValueId balanceRoot;

   balanceExpr().getFirst(balanceRoot);

   ItmBalance *balanceExpr = (ItmBalance *)balanceRoot.getItemExpr();

   CostScalar resultSize = balanceExpr->computeResultSize(childCardinality);

   return resultSize;
 }


 // RelSample::synthEstLogProp() ------------------------------------------
 // synthesize estimated logical properties given a specific set of
 // input log. properties.
 //
 // Parameters:
 //
 // EstLogPropSharedPtr inputEstLogProp
 //    IN : A set of input logical properties used to estimate the logical
 //         properities of this node.
 //
 void RelSample::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp)
 {
   if (getGroupAttr()->isPropSynthesized(inputEstLogProp) == TRUE)
     return;

   // Get the estimated logical properties of the child. To be used
   // to estimate the logical properties of this node.
   //
   EstLogPropSharedPtr childEstProp  = child(0).outputLogProp(inputEstLogProp);
   const ColStatDescList &childColStats = childEstProp->getColStats();

   CostScalar rowCount =
     computeResultSize(childEstProp->getResultCardinality());

   for(CollIndex i = 0; i < childColStats.entries(); i++) {
     ColStatDescSharedPtr columnStatDesc = childColStats[i];
     CostScalar oldCount = columnStatDesc->getColStats()->getRowcount();

     if (oldCount != rowCount)
       columnStatDesc->synchronizeStats(oldCount, rowCount);
   }

   EstLogPropSharedPtr myEstProps =
     synthEstLogPropForUnaryLeafOp(inputEstLogProp,
                                   childColStats,
                                   rowCount);

   // Set the logical properties of this node.
   //
   getGroupAttr()->addInputOutputLogProp(inputEstLogProp, myEstProps);

 } // RelSample::synthEstLogProp


 // RelSample::synthLogProp ----------------------------------------------
 // synthesize logical properties
 //
 void
 RelSample::synthLogProp(NormWA * normWAPtr)
 {
   // check to see whether properties are already synthesized.
   if (getGroupAttr()->existsLogExprForSynthesis())
     return;

   RelExpr::synthLogProp(normWAPtr);

   ValueIdSet nonRIConstraints;
   for (ValueId x= child(0).getGroupAttr()->getConstraints().init();
        child(0).getGroupAttr()->getConstraints().next(x);
        child(0).getGroupAttr()->getConstraints().advance(x) )
     {
       if ((x.getItemExpr()->getOperatorType() != ITM_COMP_REF_OPT_CONSTRAINT) &&
 	  (x.getItemExpr()->getOperatorType() != ITM_REF_OPT_CONSTRAINT))
 	  nonRIConstraints += x;
     }
   getGroupAttr()->addConstraints(nonRIConstraints);
   getGroupAttr()->addSuitableRefOptConstraints
     (child(0).getGroupAttr()->getConstraints());

 } // RelSample::synthLogProp()


 // RelSample::bindNode - Bind the RelSample node.
 // This node is generated by the parser when it encounters a SAMPLE
 // clause. This node has two item expressions:
 //
 // sizeExprTree(): This expression contains either a simple size
 // expression containing a size, type and normal/oversampling field
 // or a tree of IfThenElse nodes each with a predicate on a column
 // and a simple size expression followed by an optional else part.
 //
 // skipPeriodTree(): This is a simple size expression with no reference
 // to any inputs or outputs of this node or its child. The reason to make
 // this into an expression is only to reuse the SampleSize ItemExpr.
 // However, this "expression" is really not an expression and hence need not
 // treated as one (e.g., there is no need to bind, normalize, etc. Similarly
 // no need to check for outer references, etc).
 //
 RelExpr *RelSample::bindNode(BindWA *bindWA)
 {

   // If this node has already been bound, we are done.
   //
   if (nodeIsBound())
     return this;

   // Bind the child nodes.
   //
   bindChildren(bindWA);
   if (bindWA->errStatus())
     return this;


   // If this is a random sample on an HBase table, push the sampling down into
   // the Scan node and remove the Sample node from the tree. For HBase, we
   // perform sampling via a row filter on the HBase side.
   //
   // Avoid pushdown for oversampling (sampling rate > 100%); the HBase filter
   // we use can not return >1 copy of a row.
   //
   // For very low sampling rates, a significant amount of time could be spent in
   // HBase before returning anything to Trafodion, with the risk of getting a
   // scanner timeout exception. This is addressed on the HBase side by reducing
   // the scan's cache size (the term they use for a set of rows combined into a
   // single return). In extreme cases, the expected interval between returns may
   // still be too great even when the cache size is set to the  minimum prescribed
   // by the HBASE_NUM_CACHE_ROWS_MIN cqd. For these cases, we divide the sampling
   // between HBase and Trafodion, doing as much as possible in HBase without risking
   // timeout.
   //
   Float32 trafSampleRate = getSamplePercent();
   RelExpr* myChild = child(0);
   if (myChild->getOperatorType() == REL_SCAN &&
       (static_cast<Scan*>(myChild))->isHbaseTable() &&
       isSimpleRandomRelative() &&
       trafSampleRate <= 1.0f)
     {
       ULng32 returnInterval =
           ActiveSchemaDB()->getDefaults().getAsULong(USTAT_HBASE_SAMPLE_RETURN_INTERVAL);
       Lng32 cacheMin = CmpCommon::getDefaultNumeric(HBASE_NUM_CACHE_ROWS_MIN);
       if (trafSampleRate < cacheMin / (Float32)returnInterval)
         {
           Float32 hbaseSampleRate = cacheMin / (Float32)returnInterval;
           trafSampleRate /= hbaseSampleRate;

           // The parser function literalOfNumericWithScale() is used to get the
           // correct form of the ConstValue (a fixed numeric) required for the
           // ITM_BALANCE sample percentage operand. That function expects a heap-
           // allocated NAString (which it deletes) containing the percentage in
           // text form.
           static const int BUF_SIZE = 20;
           char buf[BUF_SIZE];
           snprintf(buf, BUF_SIZE, "%f", trafSampleRate * 100);  // Express as a percentage
           NAString* percentStrPtr = new(STMTHEAP) NAString(buf, STMTHEAP);
           ExprNode* oldConst = balanceExprTree_->getChild(1);
           balanceExprTree_->setChild(1, literalOfNumericWithScale(percentStrPtr, '+'));
           delete oldConst;
           (static_cast<Scan*>(myChild))->samplePercent(hbaseSampleRate);
         }
       else
         {
           (static_cast<Scan*>(myChild))->samplePercent(trafSampleRate);
           return myChild;
         }
     }

   ItemExpr *requiredOrderTree = removeRequiredOrderTree();

   if(requiredOrderTree) {
     bindWA->getCurrentScope()->context()->inOrderBy() = TRUE;
     requiredOrderTree->convertToValueIdList(requiredOrder(),
                                             bindWA,
                                             ITM_ITEM_LIST);
     bindWA->getCurrentScope()->context()->inOrderBy() = FALSE;
     if(bindWA->errStatus())
       return this;
   }

   // Bind the balanceExprTree. This expression may contain a tree
   // of Balance expressions.
   //
   ItemExpr *boundBalanceExpr = removeBalanceExprTree()->bindNode(bindWA);

   if (bindWA->errStatus())
     return this;

   if (boundBalanceExpr != NULL) {
     balanceExpr().insert(boundBalanceExpr->getValueId());

     if(((ItmBalance *)boundBalanceExpr)->checkErrors()) {
       bindWA->setErrStatus();
       return this;
     }
   }

   // Generate the selection predicate from the balance expression only
   // if there is a balance expression tree and there is no else clause.
   //
   NABoolean hasReturnTrue = FALSE;
   ItmBalance * nextBalanceNode = (ItmBalance *)boundBalanceExpr;

   while ((nextBalanceNode != NULL) AND (hasReturnTrue == FALSE))
   {
     if (nextBalanceNode->getPredicate()->getOperatorType() == ITM_RETURN_TRUE)
       hasReturnTrue = TRUE;
     nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance();
   }

   if (hasReturnTrue == FALSE)
   {
     nextBalanceNode = (ItmBalance *)boundBalanceExpr;
     ItemExpr *pred;
     ItemExpr *orexpr = NULL;

     while (nextBalanceNode != NULL)
     {
       pred = nextBalanceNode->getPredicate();

       if (orexpr != NULL)
         orexpr = new (CmpCommon::statementHeap()) BiLogic(ITM_OR, orexpr, pred);
       else
         orexpr = pred;

       nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance();
     }
     // Now synthesize type
     if (orexpr)
       orexpr->synthTypeAndValueId(TRUE);  // redrive Type Synthesis

     addSelPredTree(orexpr);
   }

   // Construct the RETDesc for this node.
   //
   RETDesc *resultTable = new(bindWA->wHeap()) RETDesc(bindWA);

   // Add the columns from the child to the RETDesc.
   //
   const RETDesc &childTable = *child(0)->getRETDesc();

   const ColumnDescList *sysColList = childTable.getSystemColumnList();

   CollIndex i = 0;
   for(i = 0; i < sysColList->entries(); i++)
   {
     ValueId columnValueId = sysColList->at(i)->getValueId();
     ItemExpr *newColumn = new (bindWA->wHeap())
       NotCovered (columnValueId.getItemExpr());
     newColumn->synthTypeAndValueId();

     resultTable->addColumn(bindWA,
       sysColList->at(i)->getColRefNameObj(),
       newColumn->getValueId(),
       SYSTEM_COLUMN,
       sysColList->at(i)->getHeading());
     sampledColumns() += newColumn->getValueId();
   }

   for(i = 0; i < childTable.getDegree(); i++)
   {
     ValueId columnValueId = childTable.getValueId(i);
     ItemExpr *newColumn = new (bindWA->wHeap())
       NotCovered (columnValueId.getItemExpr());
     newColumn->synthTypeAndValueId();

     resultTable->addColumn(bindWA,
       childTable.getColRefNameObj(i),
       newColumn->getValueId(),
       USER_COLUMN,
       childTable.getHeading(i));
     sampledColumns() += newColumn->getValueId();
   }

   // Set the return descriptor
   //
   setRETDesc(resultTable);
   bindWA->getCurrentScope()->setRETDesc(resultTable);

   //
   // Bind the base class.
   //
   return bindSelf(bindWA);
 } // RelSample::bindNode()

 // -----------------------------------------------------------------------
 // RelSample::semanticQueryOptimizeNode()
 // This instance of the SQO virtual method is the same as the base class
 // implementation except that it also keeps track of which
 // VEGRegion we are currently in.
 // -----------------------------------------------------------------------
 RelExpr * RelSample::semanticQueryOptimizeNode(NormWA & normWARef)
 {
   if (nodeIsSemanticQueryOptimized())
     return this;
   markAsSemanticQueryOptimized() ;

   normWARef.locateAndSetVEGRegion(this);
   // ---------------------------------------------------------------------
   // UnNest the child.
   // ---------------------------------------------------------------------
   child(0) = child(0)->semanticQueryOptimizeNode(normWARef);

   normWARef.restoreOriginalVEGRegion();

   return this;

 } // RelSample::semanticQueryOptimizeNode()


 /////////////////////////////////////////////////////////////////////////////////////
 //
 // Methods of the PhysSample
 //
 /////////////////////////////////////////////////////////////////////////////////////

 PhysSample::~PhysSample()
 {
 };


 RelExpr *
 PhysSample::copyTopNode(RelExpr *derivedNode, CollHeap *oHeap)
 {
   PhysSample *result;

   if (derivedNode == NULL)
     result = new (oHeap) PhysSample();
   else
     result = (PhysSample *)derivedNode;

   return RelSample::copyTopNode(result, oHeap);

 }


 // PhysSample::costMethod()
 // Obtain a pointer to a CostMethod object providing access
 // to the cost estimation functions for nodes of this type.
 CostMethod*
 PhysSample::costMethod() const
 {
   static THREAD_P CostMethodSample *m = NULL;
   if (m == NULL)
     m = new (GetCliGlobals()->exCollHeap())  CostMethodSample();
   return m;
 } // PhysSample::costMethod()


 ValueIdList
 RelSample::mapSortKey(const ValueIdList &sortKey) const
 {

   ValueIdMap sampColsMap;

   for(ValueId sampleCol = sampledColumns().init();
       sampledColumns().next(sampleCol);
       sampledColumns().advance(sampleCol)) {

     CMPASSERT(sampleCol.getItemExpr()->getOperatorType() == ITM_NOTCOVERED);

     sampColsMap.addMapEntry(sampleCol,
                             sampleCol.getItemExpr()->child(0)->getValueId());
   }

   ValueIdList newSortKey;

   sampColsMap.mapValueIdListUp(newSortKey, sortKey);

   return newSortKey;
 }

 PhysicalProperty *
 PhysSample::synthPhysicalProperty(const Context *context,
                                   const Lng32 pn,
                                   PlanWorkSpace  *pws)
 {

   const PhysicalProperty * const sppOfChild =
     context->getPhysicalPropertyOfSolutionForChild(0);

   // for now, simply propagate the physical property
   PhysicalProperty *samplePP = new(CmpCommon::statementHeap())
     PhysicalProperty(*sppOfChild,
                      mapSortKey(sppOfChild->getSortKey()),
                      sppOfChild->getSortOrderType(),
                      sppOfChild->getDp2SortOrderPartFunc());

   return samplePP;
 } //  PhysSample::synthPhysicalProperty()