blob: 92f318a91ea593d84a48e8c8de951b7169b6281e [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: RelSample.cpp
* Description: All the methods of RelSample() and PhysSample().
*
* Created: 9/24/98
* Language: C++
*
*
******************************************************************************
*/
#include "AllItemExpr.h"
#include "ItemSample.h"
#include "AllRelExpr.h"
#include "RelSample.h"
#include "SchemaDB.h"
#include "GroupAttr.h"
#include "BindWA.h"
#include "NormWA.h"
#include "Cost.h"
#include "CostMethod.h"
#include "opt.h"
#include "Globals.h"
#include "SqlParserAux.h"
// -----------------------------------------------------------------------
// This file contains all the methods for the class RelSample.
// This is a departure from the way other RelExpr's are organized.
//
///////////////////////////////////////////////////////////////////////////
//
// RelSample Class
//
//
///////////////////////////////////////////////////////////////////////////
RelSample::RelSample(RelExpr *child,
SampleTypeEnum sampleType,
ItemExpr *balanceExpr,
ItemExpr *requiredOrder,
CollHeap *oHeap)
: RelExpr(REL_SAMPLE, child, NULL, oHeap),
balanceExprTree_(balanceExpr),
sampleType_(sampleType),
sampleScanSucceeded_(FALSE),
requiredOrderTree_(requiredOrder)
{
setNonCacheable();
if (balanceExprTree_ != NULL)
{
((ItmBalance *)balanceExprTree_)->propagateSampleType(sampleType);
((ItmBalance *)balanceExprTree_)->rearrangeChildren();
}
}
RelSample::~RelSample()
{
}
ItemExpr *
RelSample::removeRequiredOrderTree()
{
ItemExpr *requiredOrderTree = requiredOrderTree_;
requiredOrderTree_ = NULL;
return requiredOrderTree;
}
Float32 RelSample::getSamplePercent() const
{
// Call ONLY if: RANDOM or CLUSTER sampling and relative size
// and NOT stratified sampling
if (sampleType() != RANDOM &&
sampleType() != CLUSTER)
return -1.0;
ItmBalance * balExp = NULL;
if (balanceExprTree_ != NULL)
balExp = (ItmBalance *)balanceExprTree_;
else
{
if (NOT balanceExpr().isEmpty())
{
ValueId exprId;
balanceExpr().getFirst(exprId);
balExp = (ItmBalance *)(exprId.getItemExpr());
}
}
if (balExp != NULL)
{
if ((balExp->isAbsolute() == TRUE) OR
(balExp->getNextBalance() != NULL))
return -1.0;
double size = balExp->getSampleConstValue();
size = size / 100; // Size specified as percent
return (float)size;
}
return -1.0;
}
Lng32 RelSample::getClusterSize() const
{
// Call ONLY if: CLUSTER sampling
// and NOT stratified sampling
if (sampleType() != CLUSTER)
return -1;
ItmBalance * balExp = NULL;
if (balanceExprTree_ != NULL)
balExp = (ItmBalance *)balanceExprTree_;
else
{
if (NOT balanceExpr().isEmpty())
{
ValueId exprId;
balanceExpr().getFirst(exprId);
balExp = (ItmBalance *)(exprId.getItemExpr());
}
}
if (balExp != NULL)
{
if (balExp->getNextBalance() != NULL)
return -1;
double size = balExp->getClusterConstValue();
return (Lng32)size;
}
return -1;
}
NABoolean RelSample::isSimpleRandomRelative() const
{
// True only if: RANDOM sampling and relative size
// and NOT stratified sampling (except for random
// sample of a single stratum).
if (getSamplePercent() == -1.0)
return FALSE;
else
return TRUE;
}
// RelSample::topHash() --------------------------------------------------
// Compute a hash value for a chain of derived RelExpr nodes.
// Used by the Cascade engine as a quick way to determine if
// two nodes are identical.
// Can produce false positives (nodes appear to be identical),
// but should not produce false negatives (nodes are definitely different)
//
// Inputs: none (other than 'this')
//
// Outputs: A HashValue of this node and all nodes in the
// derivation chain below (towards the base class) this node.
//
HashValue RelSample::topHash()
{
// Compute a hash value of the derivation chain below this node.
//
HashValue result = RelExpr::topHash();
result ^= sampleType();
result ^= balanceExpr();
result ^= sampledColumns();
result ^= requiredOrder();
return result;
}
// RelSample::duplicateMatch()
// A more thorough method to compare two RelExpr nodes.
// Used by the Cascades engine when the topHash() of two
// nodes returns the same hash values.
//
// Inputs: other - a reference to another node of the same type.
//
// Outputs: NABoolean - TRUE if this node is 'identical' to the
// 'other' node. FALSE otherwise.
//
// In order to match, this node must match all the way down the
// derivation chain to the RelExpr class.
//
NABoolean
RelSample::duplicateMatch(const RelExpr & other) const
{
// Compare this node with 'other' down the derivation chain.
//
if (!RelExpr::duplicateMatch(other))
return FALSE;
// Cast the RelExpr to a RelSample node.
//
RelSample &o = (RelSample &) other;
// If the sampling type and sample size expressions are same then the
// nodes are identical
//
if (!(sampleType() == o.sampleType()))
return FALSE;
if(!(balanceExpr() == o.balanceExpr()))
return FALSE;
// If the required order keys are not the same
// then the nodes are not identical
//
if (!(requiredOrder() == o.requiredOrder()))
return FALSE;
return TRUE;
}
// RelSample::copyTopNode ----------------------------------------------
// Copy a chain of derived nodes (Calls RelExpr::copyTopNode).
// Needs to copy all relevant fields.
// Used by the Cascades engine.
//
// Inputs: derivedNode - If Non-NULL this should point to a node
// which is derived from this node. If NULL, then this
// node is the top of the derivation chain and a node must
// be constructed.
//
// Outputs: RelExpr * - A Copy of this node.
//
// If the 'derivedNode is non-NULL, then this method is being called
// from a copyTopNode method on a class derived from this one. If it
// is NULL, then this is the top of the derivation chain and an UnPackRows
// node must be constructed.
//
// In either case, the relevant data members must be copied to 'derivedNode'
// and 'derivedNode' is passed to the copyTopNode method of the class
// below this one in the derivation chain (RelExpr::copyTopNode() in this
// case).
//
RelExpr *
RelSample::copyTopNode(RelExpr *derivedNode, CollHeap *outHeap)
{
RelSample *result;
if (derivedNode == NULL)
// This is the top of the derivation chain
result = new (outHeap) RelSample(child(0),
sampleType(),
balanceExprTree()
);
else
// A node has already been constructed as a derived class.
result = (RelSample *) derivedNode;
// Copy the relavant fields.
result->sampleType_ = sampleType();
result->sampleScanSucceeded_ = sampleScanSucceeded();
if (balanceExprTree() != NULL)
result->balanceExprTree() = balanceExprTree()->copyTree(outHeap)->castToItemExpr();
result->balanceExpr() = balanceExpr();
result->sampledColumns() = sampledColumns();
result->requiredOrder() = requiredOrder();
// Copy any data members from the classes lower in the derivation chain.
//
return RelExpr::copyTopNode(result, outHeap);
}
// RelSample::addLocalExpr() -----------------------------------------------
// Insert into a list of expressions all the expressions of this node and
// all nodes below this node in the derivation chain. Insert into a list of
// names, all the names of the expressions of this node and all nodes below
// this node in the derivation chain. This method is used by the GUI tool
// and by the Explain Function to have a common method to get all the
// expressions associated with a node.
//
// Inputs/Outputs: xlist - a list of expressions.
// llist - a list of names of expressions.
//
// The xlist contains a list of all the expressions associated with this
// node. The llist contains the names of these expressions. (This lists
// must be kept in the same order).
// RelSample::addLocalExpr potentially adds the balance expression
// ("balance_expression").
//
// It then calls RelExpr::addLocalExpr() which will add any RelExpr
// expressions to the list.
//
void RelSample::addLocalExpr(LIST(ExprNode *) &xlist,
LIST(NAString) &llist) const
{
if (sampledColumns().entries() > 0)
{
xlist.insert(sampledColumns().rebuildExprTree(ITM_ITEM_LIST));
llist.insert("sampled_columns");
}
if (balanceExprTree() || balanceExpr().entries() > 0)
{
if (balanceExprTree_)
xlist.insert(balanceExprTree_);
else
xlist.insert(balanceExpr().rebuildExprTree(ITM_ITEM_LIST));
llist.insert("balance_expression");
}
if (requiredOrderTree_) {
xlist.insert(requiredOrderTree_);
llist.insert("required_order");
} else if (requiredOrder().entries() > 0) {
xlist.insert(requiredOrder().rebuildExprTree(ITM_ITEM_LIST));
llist.insert("required_order");
}
RelExpr::addLocalExpr(xlist,llist);
}
// RelSample::getPotentialOutputValues() ---------------------------------
// Construct a Set of the potential outputs of this node.
//
// Inputs: none (other than 'this')
//
// Outputs: outputValues - a ValueIdSet representing the potential outputs
// of this node.
//
// The potential outputs for the RelSample node are the new columns
// generated by the RelSample node. The new columns generated by RelSample
// node are "Sampled" versions of all the potential outputs of its child.
//
void
RelSample::getPotentialOutputValues(ValueIdSet & outputValues) const
{
outputValues += sampledColumns();
}
// RelSample::pushdownCoveredExpr() ------------------------------------
//
// In order to compute the Group Attributes for a relational operator
// an analysis of all the scalar expressions associated with it is
// performed. The purpose of this analysis is to identify the sources
// of the values that each expression requires. As a result of this
// analysis values are categorized as external dataflow inputs or
// those that can be produced completely by a certain child of the
// relational operator.
//
// This method is invoked on each relational operator. It causes
// a) the pushdown of predicates and
// b) the recomputation of the Group Attributes of each child.
// The recomputation is required either because the child is
// assigned new predicates or is expected to compute some of the
// expressions that are required by its parent.
//
// For the sample operator, only the balance expression contains references
// to any outputs produced by the child. These expressions refer to the unsampled
// columns and therefore can be pushed down. However, the predicatesOnParent if
// they exist refer to the sampled columns and therefore they will not be pushed
// down.
// ---------------------------------------------------------------------
void
RelSample::pushdownCoveredExpr(const ValueIdSet &outputExpr,
const ValueIdSet &newExternalInputs,
ValueIdSet &predicatesOnParent,
const ValueIdSet *setOfValuesReqdByParent,
Lng32 childIndex
)
{
ValueIdSet exprOnParent;
if (setOfValuesReqdByParent)
exprOnParent = *setOfValuesReqdByParent;
exprOnParent += outputExpr;
ValueId refVal;
ValueIdSet outputSet;
// Prune from the sampledColumns() ValueIdSet, those expressions
// that are not needed above (in setOfValuesReqdByParent) or by
// the selectionPred.
//
for(ValueId sampleCol = sampledColumns().init(); sampledColumns().next(sampleCol);
sampledColumns().advance(sampleCol)) {
if(!exprOnParent.referencesTheGivenValue(sampleCol, refVal) &&
!selectionPred().referencesTheGivenValue(sampleCol, refVal)) {
sampledColumns() -= sampleCol;
}
}
// Remove all expressions from exprOnParent. They
// can't be pushed down!
//
exprOnParent.clear();
// Add all the values required for the Sample expressions
// to the values required by the parent. These expression
// can't be pushed down either, but attempting to push them
// down causes the child node to provide the values needed.
//
outputSet += sampledColumns();
exprOnParent += balanceExpr();
exprOnParent.insertList(requiredOrder());
RelExpr::pushdownCoveredExpr(outputSet,
newExternalInputs,
predicatesOnParent,
&exprOnParent,
childIndex);
} // RelSample::pushdownCoveredExpr
Context* RelSample::createContextForAChild(Context* myContext,
PlanWorkSpace* pws,
Lng32& childIndex)
{
// ---------------------------------------------------------------------
// If one Context has been generated for each child, return NULL
// to signal completion.
// ---------------------------------------------------------------------
if (pws->getCountOfChildContexts() == getArity())
return NULL;
childIndex = 0;
Lng32 planNumber = 0;
const ReqdPhysicalProperty* rppForMe = myContext->getReqdPhysicalProperty();
PartitioningRequirement* partReqForMe =
rppForMe->getPartitioningRequirement();
// If a partitioning requirement exists and it requires broadcast
// replication, then return NULL now. Only an exchange operator
// can satisfy a broadcast replication partitioning requirement.
if ((partReqForMe != NULL) AND
partReqForMe->isRequirementReplicateViaBroadcast())
return NULL;
RequirementGenerator rg(child(0),rppForMe);
// ---------------------------------------------------------------------
// Add the order requirements needed for this RelSample node
// ---------------------------------------------------------------------
// Remove any sort order requirement from parent.
//
rg.removeSortKey();
rg.removeArrangement();
rg.removeSortOrderTypeReq();
// Shouldn't/Can't add a sort order type requirement
// if we are in DP2
if (rppForMe->executeInDP2())
rg.addSortKey(requiredOrder(),NO_SOT);
else
rg.addSortKey(requiredOrder(),ESP_SOT);
// Can not execute absolute sampling in parallel
//
if(!isSimpleRandomRelative())
rg.addNumOfPartitions(1);
// ---------------------------------------------------------------------
// Done adding all the requirements together, now see whether it worked
// and give up if it is not possible to satisfy them
// ---------------------------------------------------------------------
if (NOT rg.checkFeasibility())
return NULL;
// ---------------------------------------------------------------------
// Compute the cost limit to be applied to the child.
// ---------------------------------------------------------------------
CostLimit* costLimit = computeCostLimit(myContext, pws);
// ---------------------------------------------------------------------
// Get a Context for optimizing the child.
// Search for an existing Context in the CascadesGroup to which the
// child belongs that requires the same properties as those in
// rppForChild. Reuse it, if found. Otherwise, create a new Context
// that contains rppForChild as the required physical properties..
// ---------------------------------------------------------------------
Context* result = shareContext(childIndex,
rg.produceRequirement(),
myContext->getInputPhysicalProperty(),
costLimit,
myContext,
myContext->getInputLogProp());
// ---------------------------------------------------------------------
// Store the Context for the child in the PlanWorkSpace.
// ---------------------------------------------------------------------
pws->storeChildContext(childIndex, planNumber, result);
return result;
} // RelSample::createContextForAChild()
// RelSample::removeBalanceExprTree() -------------------------------------
// Return the sizeExprTree_ ItemExpr tree and set to NULL,
//
// Inputs: none (Other than 'this')
//
// Outputs: ItemExpr * - the value of sizeExprTree_
//
// Side Effects: Sets the value of sizeExprTree_ to NULL.
//
// Called by RelSample::bindNode(). The value of sizeExprTree_ is not
// needed after the binder.
//
ItemExpr *
RelSample::removeBalanceExprTree()
{
ItemExpr *result = balanceExprTree();
balanceExprTree_ = (ItemExpr *)NULL;
return result;
}
// RelSample::transformNode() -------------------------------------------
// Unconditional query transformations such as the transformation of
// a subquery to a semijoin are implemented by the virtual function
// transformNode(). The aim of such transformations is to bring the
// query tree to a canonical form. transformNode() also ensures
// that the "required" (or characteristic) input values are "minimal"
// and the "required" (or characteristic) outputs values are
// "maximal" for each operator.
//
// transformNode() is an overloaded name, which is used for a set
// of methods that implement the transformation phase of query
// normalization.
//
// We use the term query tree for a tree of relational operators,
// each of which can contain none or more scalar expression trees.
// The transformations performed by transformNode() brings scalar
// expressions into a canonical form. The effect of most such
// transformations is local to the scalar expression tree.
// However, the transformation of a subquery requires a semijoin
// to be performed between the relational operator that contains
// the subquery and the query tree for the subquery. The effect
// of such a subquery transformation is therefore visible not
// only in the scalar expression tree but also in the relational
// expression tree.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
// ExprGroupId & locationOfPointerToMe
// IN : a reference to the location that contains a pointer to
// the RelExpr that is currently being processed.
//
void RelSample::transformNode(NormWA &normWARef,
ExprGroupId &locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
// If this node has already been transformed, we are done.
//
if (nodeIsTransformed())
return;
// Make sure that it is only transformed once.
//
markAsTransformed();
//Sample node does not pull up the predicates and so the equality
//predicates on below this node are not true above this node,
//so create a new VEGRegion when transfroming the child
normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,this);
// transformNode takes up a bound tree and turns into a transformed
// tree. For a RelExpr that means the following.
// + expressions are transformed. If the expressions contain
// subqueries then new RelExpr are created for them and
// they are usually added above (as an ancestor) of the node
// that contained them.
// + predicates are pulled up from the children and their
// required inputs are modified
// + the required inputs of the node itself are changed from
// being a sufficient set to being a sufficient minimal set.
//
// Transform the child.
// Pull up their transformed predicates
// recompute their required inputs.
//
child(0)->transformNode(normWARef, child(0));
if(balanceExpr().transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs()))
{
// -----------------------------------------------------------------
// Transform my new child
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
if(requiredOrder().
transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs())) {
// The requiredOrder list apparently had some subqueries that had
// not been processed before (is this possible?). Normalize the
// new tree that has become our child.
//
child(0)->transformNode(normWARef, child(0));
}
normWARef.restoreOriginalVEGRegion();
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
//
pullUpPreds();
// transform the selection predicates
//
transformSelectPred(normWARef, locationOfPointerToMe);
} // RelSample::transformNode()
// RelSample::rewriteNode() ---------------------------------------------
// rewriteNode() is the virtual function that computes
// the transitive closure for "=" predicates and rewrites value
// expressions.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
void RelSample::rewriteNode(NormWA & normWARef)
{
// locate the VEGRegion that was created for this node during transformation
normWARef.locateAndSetVEGRegion(this);
child(0)->rewriteNode(normWARef);
balanceExpr().normalizeNode(normWARef);
requiredOrder().normalizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
selectionPred().normalizeNode(normWARef);
// rewrite expression in the group attributes
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // RelSample::rewriteNode()
RelExpr * RelSample::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred());
// locate the VEGRegion that was created for this node during transformation
normWARef.locateAndSetVEGRegion(this);
child(0) = child(0)->normalizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
fixEssentialCharacteristicOutputs();
return this;
}
// RelSample::pullUpPreds() --------------------------------------------
// is redefined to disallow the pullup of predicates
// from the operator's child. The outputs of the sample operator
// are "sampled" versions of the outputs of its child.
//
void RelSample::pullUpPreds()
{
// ---------------------------------------------------------------------
// Simply don't pull up child's selection predicates. Still need to tell
// child to recompute its outer references due to the warning below.
// ---------------------------------------------------------------------
child(0)->recomputeOuterReferences();
// ---------------------------------------------------------------------
// WARNING: One rule that this procedure must follow is
// that recomputeOuterReferences() must be called on the children even
// if no predicates are pulled up from them. This is to correct
// the outer references that are added to a right child of a
// semi or outer join when processing subqueries in the ON clause.
// ---------------------------------------------------------------------
}
// RelSample::recomputeOuterReferences() --------------------------------
// This method is used by the normalizer for recomputing the
// outer references (external dataflow input values) that are
// still referenced by each operator in the subquery tree
// after the predicate pull up is complete.
//
// Side Effects: sets the characteristicInputs of the groupAttr.
//
void RelSample::recomputeOuterReferences()
{
// This is virtual method on RelExpr.
// When this is called it is assumed that the children have already
// been transformed.
// The required inputs of the child are therefore already minimal
// and sufficient.
// It is also assumed that the RelExpr itself has been bound.
// That implies that the group attributes have already been allocated
// and the required inputs is a sufficient (but not necessarilly minimum)
// set of external values needed to evaluate all expressions in this subtree.
//
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
//
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// The set of valueIds need by this node.
//
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr += balanceExpr();
allMyExpr.insertList(requiredOrder());
// Remove from outerRefs those valueIds that are not needed
// by all my expressions
//
allMyExpr.weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my children need.
//
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// set my Character Inputs to this new minimal set.
//
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // RelSample::recomputeOuterReferences()
CostScalar RelSample::computeResultSize(const CostScalar &childCardinality)
{
// CostScalar resultSize;
// Compute the result size based on the size expression.
// For now, set it to the child cardinality
CMPASSERT(balanceExpr().entries() == 1);
ValueId balanceRoot;
balanceExpr().getFirst(balanceRoot);
ItmBalance *balanceExpr = (ItmBalance *)balanceRoot.getItemExpr();
CostScalar resultSize = balanceExpr->computeResultSize(childCardinality);
return resultSize;
}
// RelSample::synthEstLogProp() ------------------------------------------
// synthesize estimated logical properties given a specific set of
// input log. properties.
//
// Parameters:
//
// EstLogPropSharedPtr inputEstLogProp
// IN : A set of input logical properties used to estimate the logical
// properities of this node.
//
void RelSample::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp)
{
if (getGroupAttr()->isPropSynthesized(inputEstLogProp) == TRUE)
return;
// Get the estimated logical properties of the child. To be used
// to estimate the logical properties of this node.
//
EstLogPropSharedPtr childEstProp = child(0).outputLogProp(inputEstLogProp);
const ColStatDescList &childColStats = childEstProp->getColStats();
CostScalar rowCount =
computeResultSize(childEstProp->getResultCardinality());
for(CollIndex i = 0; i < childColStats.entries(); i++) {
ColStatDescSharedPtr columnStatDesc = childColStats[i];
CostScalar oldCount = columnStatDesc->getColStats()->getRowcount();
if (oldCount != rowCount)
columnStatDesc->synchronizeStats(oldCount, rowCount);
}
EstLogPropSharedPtr myEstProps =
synthEstLogPropForUnaryLeafOp(inputEstLogProp,
childColStats,
rowCount);
// Set the logical properties of this node.
//
getGroupAttr()->addInputOutputLogProp(inputEstLogProp, myEstProps);
} // RelSample::synthEstLogProp
// RelSample::synthLogProp ----------------------------------------------
// synthesize logical properties
//
void
RelSample::synthLogProp(NormWA * normWAPtr)
{
// check to see whether properties are already synthesized.
if (getGroupAttr()->existsLogExprForSynthesis())
return;
RelExpr::synthLogProp(normWAPtr);
ValueIdSet nonRIConstraints;
for (ValueId x= child(0).getGroupAttr()->getConstraints().init();
child(0).getGroupAttr()->getConstraints().next(x);
child(0).getGroupAttr()->getConstraints().advance(x) )
{
if ((x.getItemExpr()->getOperatorType() != ITM_COMP_REF_OPT_CONSTRAINT) &&
(x.getItemExpr()->getOperatorType() != ITM_REF_OPT_CONSTRAINT))
nonRIConstraints += x;
}
getGroupAttr()->addConstraints(nonRIConstraints);
getGroupAttr()->addSuitableRefOptConstraints
(child(0).getGroupAttr()->getConstraints());
} // RelSample::synthLogProp()
// RelSample::bindNode - Bind the RelSample node.
// This node is generated by the parser when it encounters a SAMPLE
// clause. This node has two item expressions:
//
// sizeExprTree(): This expression contains either a simple size
// expression containing a size, type and normal/oversampling field
// or a tree of IfThenElse nodes each with a predicate on a column
// and a simple size expression followed by an optional else part.
//
// skipPeriodTree(): This is a simple size expression with no reference
// to any inputs or outputs of this node or its child. The reason to make
// this into an expression is only to reuse the SampleSize ItemExpr.
// However, this "expression" is really not an expression and hence need not
// treated as one (e.g., there is no need to bind, normalize, etc. Similarly
// no need to check for outer references, etc).
//
RelExpr *RelSample::bindNode(BindWA *bindWA)
{
// If this node has already been bound, we are done.
//
if (nodeIsBound())
return this;
// Bind the child nodes.
//
bindChildren(bindWA);
if (bindWA->errStatus())
return this;
// If this is a random sample on an HBase table, push the sampling down into
// the Scan node and remove the Sample node from the tree. For HBase, we
// perform sampling via a row filter on the HBase side.
//
// Avoid pushdown for oversampling (sampling rate > 100%); the HBase filter
// we use can not return >1 copy of a row.
//
// For very low sampling rates, a significant amount of time could be spent in
// HBase before returning anything to Trafodion, with the risk of getting a
// scanner timeout exception. This is addressed on the HBase side by reducing
// the scan's cache size (the term they use for a set of rows combined into a
// single return). In extreme cases, the expected interval between returns may
// still be too great even when the cache size is set to the minimum prescribed
// by the HBASE_NUM_CACHE_ROWS_MIN cqd. For these cases, we divide the sampling
// between HBase and Trafodion, doing as much as possible in HBase without risking
// timeout.
//
Float32 trafSampleRate = getSamplePercent();
RelExpr* myChild = child(0);
if (myChild->getOperatorType() == REL_SCAN &&
(static_cast<Scan*>(myChild))->isHbaseTable() &&
isSimpleRandomRelative() &&
trafSampleRate <= 1.0f)
{
ULng32 returnInterval =
ActiveSchemaDB()->getDefaults().getAsULong(USTAT_HBASE_SAMPLE_RETURN_INTERVAL);
Lng32 cacheMin = CmpCommon::getDefaultNumeric(HBASE_NUM_CACHE_ROWS_MIN);
if (trafSampleRate < cacheMin / (Float32)returnInterval)
{
Float32 hbaseSampleRate = cacheMin / (Float32)returnInterval;
trafSampleRate /= hbaseSampleRate;
// The parser function literalOfNumericWithScale() is used to get the
// correct form of the ConstValue (a fixed numeric) required for the
// ITM_BALANCE sample percentage operand. That function expects a heap-
// allocated NAString (which it deletes) containing the percentage in
// text form.
static const int BUF_SIZE = 20;
char buf[BUF_SIZE];
snprintf(buf, BUF_SIZE, "%f", trafSampleRate * 100); // Express as a percentage
NAString* percentStrPtr = new(STMTHEAP) NAString(buf, STMTHEAP);
ExprNode* oldConst = balanceExprTree_->getChild(1);
balanceExprTree_->setChild(1, literalOfNumericWithScale(percentStrPtr, '+'));
delete oldConst;
(static_cast<Scan*>(myChild))->samplePercent(hbaseSampleRate);
}
else
{
(static_cast<Scan*>(myChild))->samplePercent(trafSampleRate);
return myChild;
}
}
ItemExpr *requiredOrderTree = removeRequiredOrderTree();
if(requiredOrderTree) {
bindWA->getCurrentScope()->context()->inOrderBy() = TRUE;
requiredOrderTree->convertToValueIdList(requiredOrder(),
bindWA,
ITM_ITEM_LIST);
bindWA->getCurrentScope()->context()->inOrderBy() = FALSE;
if(bindWA->errStatus())
return this;
}
// Bind the balanceExprTree. This expression may contain a tree
// of Balance expressions.
//
ItemExpr *boundBalanceExpr = removeBalanceExprTree()->bindNode(bindWA);
if (bindWA->errStatus())
return this;
if (boundBalanceExpr != NULL) {
balanceExpr().insert(boundBalanceExpr->getValueId());
if(((ItmBalance *)boundBalanceExpr)->checkErrors()) {
bindWA->setErrStatus();
return this;
}
}
// Generate the selection predicate from the balance expression only
// if there is a balance expression tree and there is no else clause.
//
NABoolean hasReturnTrue = FALSE;
ItmBalance * nextBalanceNode = (ItmBalance *)boundBalanceExpr;
while ((nextBalanceNode != NULL) AND (hasReturnTrue == FALSE))
{
if (nextBalanceNode->getPredicate()->getOperatorType() == ITM_RETURN_TRUE)
hasReturnTrue = TRUE;
nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance();
}
if (hasReturnTrue == FALSE)
{
nextBalanceNode = (ItmBalance *)boundBalanceExpr;
ItemExpr *pred;
ItemExpr *orexpr = NULL;
while (nextBalanceNode != NULL)
{
pred = nextBalanceNode->getPredicate();
if (orexpr != NULL)
orexpr = new (CmpCommon::statementHeap()) BiLogic(ITM_OR, orexpr, pred);
else
orexpr = pred;
nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance();
}
// Now synthesize type
if (orexpr)
orexpr->synthTypeAndValueId(TRUE); // redrive Type Synthesis
addSelPredTree(orexpr);
}
// Construct the RETDesc for this node.
//
RETDesc *resultTable = new(bindWA->wHeap()) RETDesc(bindWA);
// Add the columns from the child to the RETDesc.
//
const RETDesc &childTable = *child(0)->getRETDesc();
const ColumnDescList *sysColList = childTable.getSystemColumnList();
CollIndex i = 0;
for(i = 0; i < sysColList->entries(); i++)
{
ValueId columnValueId = sysColList->at(i)->getValueId();
ItemExpr *newColumn = new (bindWA->wHeap())
NotCovered (columnValueId.getItemExpr());
newColumn->synthTypeAndValueId();
resultTable->addColumn(bindWA,
sysColList->at(i)->getColRefNameObj(),
newColumn->getValueId(),
SYSTEM_COLUMN,
sysColList->at(i)->getHeading());
sampledColumns() += newColumn->getValueId();
}
for(i = 0; i < childTable.getDegree(); i++)
{
ValueId columnValueId = childTable.getValueId(i);
ItemExpr *newColumn = new (bindWA->wHeap())
NotCovered (columnValueId.getItemExpr());
newColumn->synthTypeAndValueId();
resultTable->addColumn(bindWA,
childTable.getColRefNameObj(i),
newColumn->getValueId(),
USER_COLUMN,
childTable.getHeading(i));
sampledColumns() += newColumn->getValueId();
}
// Set the return descriptor
//
setRETDesc(resultTable);
bindWA->getCurrentScope()->setRETDesc(resultTable);
//
// Bind the base class.
//
return bindSelf(bindWA);
} // RelSample::bindNode()
// -----------------------------------------------------------------------
// RelSample::semanticQueryOptimizeNode()
// This instance of the SQO virtual method is the same as the base class
// implementation except that it also keeps track of which
// VEGRegion we are currently in.
// -----------------------------------------------------------------------
RelExpr * RelSample::semanticQueryOptimizeNode(NormWA & normWARef)
{
if (nodeIsSemanticQueryOptimized())
return this;
markAsSemanticQueryOptimized() ;
normWARef.locateAndSetVEGRegion(this);
// ---------------------------------------------------------------------
// UnNest the child.
// ---------------------------------------------------------------------
child(0) = child(0)->semanticQueryOptimizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
return this;
} // RelSample::semanticQueryOptimizeNode()
/////////////////////////////////////////////////////////////////////////////////////
//
// Methods of the PhysSample
//
/////////////////////////////////////////////////////////////////////////////////////
PhysSample::~PhysSample()
{
};
RelExpr *
PhysSample::copyTopNode(RelExpr *derivedNode, CollHeap *oHeap)
{
PhysSample *result;
if (derivedNode == NULL)
result = new (oHeap) PhysSample();
else
result = (PhysSample *)derivedNode;
return RelSample::copyTopNode(result, oHeap);
}
// PhysSample::costMethod()
// Obtain a pointer to a CostMethod object providing access
// to the cost estimation functions for nodes of this type.
CostMethod*
PhysSample::costMethod() const
{
static THREAD_P CostMethodSample *m = NULL;
if (m == NULL)
m = new (GetCliGlobals()->exCollHeap()) CostMethodSample();
return m;
} // PhysSample::costMethod()
ValueIdList
RelSample::mapSortKey(const ValueIdList &sortKey) const
{
ValueIdMap sampColsMap;
for(ValueId sampleCol = sampledColumns().init();
sampledColumns().next(sampleCol);
sampledColumns().advance(sampleCol)) {
CMPASSERT(sampleCol.getItemExpr()->getOperatorType() == ITM_NOTCOVERED);
sampColsMap.addMapEntry(sampleCol,
sampleCol.getItemExpr()->child(0)->getValueId());
}
ValueIdList newSortKey;
sampColsMap.mapValueIdListUp(newSortKey, sortKey);
return newSortKey;
}
PhysicalProperty *
PhysSample::synthPhysicalProperty(const Context *context,
const Lng32 pn,
PlanWorkSpace *pws)
{
const PhysicalProperty * const sppOfChild =
context->getPhysicalPropertyOfSolutionForChild(0);
// for now, simply propagate the physical property
PhysicalProperty *samplePP = new(CmpCommon::statementHeap())
PhysicalProperty(*sppOfChild,
mapSortKey(sppOfChild->getSortKey()),
sppOfChild->getSortOrderType(),
sppOfChild->getDp2SortOrderPartFunc());
return samplePP;
} // PhysSample::synthPhysicalProperty()