blob: 843eaccce7f7e0ea88c5cb5e21ab323e98cf7898 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: RelPackedRows.cpp
* Description: All the methods of UnPackRows() and PhysUnPackRows().
*
* Created: 6/27/97
* Language: C++
*
*
******************************************************************************
*/
#include "AllItemExpr.h"
#include "AllRelExpr.h"
#include "RelPackedRows.h"
#include "SchemaDB.h"
#include "GroupAttr.h"
#include "BindWA.h"
#include "NormWA.h"
#include "Cost.h"
#include "CostMethod.h"
#include "opt.h"
#include "Globals.h"
// -----------------------------------------------------------------------
// This file contains all the methods for the class UnPackRows.
// This is a departure from the way other RelExpr's are organized.
//
// UnPackRows constructor
// Inputs:
//
// maxPackingFactor: The packing factor of the packed table. This value
// is stored so that the number of rows generated by this node
// can be estimated.
//
// unPackExprs: This expression must contain a list of expressions to
// unpack the SYSKEY and all the user columns of the packed table.
// The SYSKEY is unpacked by 'calculating' the SYSKEY value based on
// an initial value of SYSKEY for the packed row and the current value
// of the index into the packed row. The user columns are unpacked using
// the UnPackCol expression based on the value of the index. The
// index is provided at runtime thru a hostVar. The valueId of this
// HostVar must be captured so that it can be used to map a local
// variable into the workAtp of the work procedure.
//
// packingFactor: This expression must contain a list of expressions to
// extract the NUMROWS field from each packed column. During normalization,
// this list will be reduced to only one. (The value of NUMROWS is
// the same in each packed column). This order of this list must
// correspond to the order of the unPackExprs list.
//
// child: The child node of this node. presumably a Scan node.
UnPackRows::UnPackRows(Lng32 maxPackingFactor,
ItemExpr *unPackExprs,
ItemExpr *packingFactor,
TableDesc *unPackedTable,
RelExpr *child,
ValueId indexHostVarValueId,
CollHeap *oHeap)
: RelExpr(REL_UNPACKROWS, child, NULL, oHeap),
maxPackingFactor_(maxPackingFactor),
unPackExprTree_(unPackExprs),
packingFactorTree_(packingFactor),
unPackedTable_(unPackedTable),
indexValue_(indexHostVarValueId),
sysKeyId_(NULL_VALUE_ID),
originalPreds_(NULL),
rewrittenPreds_(NULL),
rowwiseRowset_(FALSE),
rwrsInputSizeExpr_(NULL),
rwrsMaxInputRowlenExpr_(NULL),
rwrsBufferAddrExpr_(NULL)
{
setNonCacheable();
}
UnPackRows::UnPackRows(Lng32 maxPackingFactor,
ItemExpr *rwrsInputSizeExpr,
ItemExpr *rwrsMaxInputRowlenExpr,
ItemExpr *rwrsBufferAddrExpr,
RelExpr *child,
CollHeap *oHeap)
: RelExpr(REL_UNPACKROWS, child, NULL, oHeap),
maxPackingFactor_(maxPackingFactor),
unPackExprTree_(NULL),
packingFactorTree_(NULL),
unPackedTable_(NULL),
indexValue_(NULL_VALUE_ID),
sysKeyId_(NULL_VALUE_ID),
originalPreds_(NULL),
rewrittenPreds_(NULL),
rowwiseRowset_(TRUE),
rwrsInputSizeExpr_(rwrsInputSizeExpr),
rwrsMaxInputRowlenExpr_(rwrsMaxInputRowlenExpr),
rwrsBufferAddrExpr_(rwrsBufferAddrExpr)
{
setNonCacheable();
}
// UnPackRows::~UnPackRows() -----------------------------------------------
// The destructor
//
UnPackRows::~UnPackRows()
{
}
// UnPackRows::topHash() --------------------------------------------------
// Compute a hash value for a chain of derived RelExpr nodes.
// Used by the Cascade engine as a quick way to determine if
// two nodes are identical.
// Can produce false positives (nodes appear to be identical),
// but should not produce false negatives (nodes are definitely different)
//
// Inputs: none (other than 'this')
//
// Outputs: A HashValue of this node and all nodes in the
// derivation chain below (towards the base class) this node.
//
HashValue UnPackRows::topHash()
{
// Compute a hash value of the derivation chain below this node.
//
HashValue result = RelExpr::topHash();
result ^= unPackExpr();
result ^= packingFactor();
return result;
}
// UnPackRows::duplicateMatch()
// A more thorough method to compare two RelExpr nodes.
// Used by the Cascades engine when the topHash() of two
// nodes returns the same hash values.
//
// Inputs: other - a reference to another node of the same type.
//
// Outputs: NABoolean - TRUE if this node is 'identical' to the
// 'other' node. FALSE otherwise.
//
// In order to match, this node must match all the way down the
// derivation chain to the RelExpr class.
//
// For the UnPackRows node, the only relevant data members which
// needs to be compared are unPackExpr_ and packingFactor_
//
NABoolean
UnPackRows::duplicateMatch(const RelExpr & other) const
{
// Compare this node with 'other' down the derivation chain.
//
if (!RelExpr::duplicateMatch(other))
return FALSE;
// Cast the RelExpr to a UnPackRows node. (This must be a UnPackRows node)
//
UnPackRows &o = (UnPackRows &) other;
// The tables described by both must be the same.
//
if(!(unPackedTable() == o.unPackedTable()))
return FALSE;
// If the unPackExprs are the same and the packingFactors are the same,
// then the nodes are identical
//
if (!(unPackExpr() == o.unPackExpr()))
return FALSE;
if(!(packingFactor() == o.packingFactor()))
return FALSE;
if(!(rowwiseRowset() == o.rowwiseRowset()))
return FALSE;
return TRUE;
}
// UnPackRows::copyTopNode ----------------------------------------------
// Copy a chain of derived nodes (Calls RelExpr::copyTopNode).
// Needs to copy all relevant fields.
// Used by the Cascades engine.
//
// Inputs: derivedNode - If Non-NULL this should point to a node
// which is derived from this node. If NULL, then this
// node is the top of the derivation chain and a node must
// be constructed.
//
// Outputs: RelExpr * - A Copy of this node.
//
// If the 'derivedNode is non-NULL, then this method is being called
// from a copyTopNode method on a class derived from this one. If it
// is NULL, then this is the top of the derivation chain and an UnPackRows
// node must be constructed.
//
// In either case, the relevant data members must be copied to 'derivedNode'
// and 'derivedNode' is passed to the copyTopNode method of the class
// below this one in the derivation chain (RelExpr::copyTopNode() in this
// case).
//
RelExpr *
UnPackRows::copyTopNode(RelExpr *derivedNode, CollHeap *outHeap)
{
UnPackRows *result;
if (derivedNode == NULL)
// This is the top of the derivation chain
// Create an empty UnPackRows node.
//
{
if (rowwiseRowset())
result = new (outHeap) UnPackRows(0,
(ItemExpr*)NULL,
(ItemExpr*)NULL,
(ItemExpr*)NULL, NULL);
else
result = new (outHeap) UnPackRows();
}
else
// A node has already been constructed as a derived class.
//
result = (UnPackRows *) derivedNode;
// Copy the relavant fields.
result->unPackedTable() = unPackedTable();
result->unPackExpr() = unPackExpr();
result->packingFactor() = packingFactor();
result->maxPackingFactor_ = getMaxPackingFactor();
result->indexValue() = indexValue();
result->sysKeyId() = sysKeyId();
result->originalPreds() = originalPreds();
result->rewrittenPreds() = rewrittenPreds();
result->setRowwiseRowset(rowwiseRowset());
// copy pointer to expressions
// These are not available after bindNode()
//
if (unPackExprTree() != NULL)
result->unPackExprTree() = unPackExprTree()->copyTree(outHeap)->castToItemExpr();
if (packingFactorTree() != NULL)
result->packingFactorTree() =
packingFactorTree()->copyTree(outHeap)->castToItemExpr();
if (rwrsInputSizeExpr_ != NULL)
result->rwrsInputSizeExpr_ = rwrsInputSizeExpr_->copyTree(outHeap)->castToItemExpr();
if (rwrsMaxInputRowlenExpr_ != NULL)
result->rwrsMaxInputRowlenExpr_ = rwrsMaxInputRowlenExpr_->copyTree(outHeap)->castToItemExpr();
if (rwrsInputSizeExpr_ != NULL)
result->rwrsBufferAddrExpr_ = rwrsBufferAddrExpr_->copyTree(outHeap)->castToItemExpr();
result->rwrsOutputVids_ = rwrsOutputVids_;
// Copy any data members from the classes lower in the derivation chain.
//
return RelExpr::copyTopNode(result, outHeap);
}
// UnPackRows::addLocalExpr() -----------------------------------------------
// Insert into a list of expressions all the expressions of this node and
// all nodes below this node in the derivation chain. Insert into a list of
// names, all the names of the expressions of this node and all nodes below
// this node in the derivation chain. This method is used by the GUI tool
// and by the Explain Function to have a common method to get all the
// expressions associated with a node.
//
// Inputs/Outputs: xlist - a list of expressions.
// llist - a list of names of expressions.
//
// The xlist contains a list of all the expressions associated with this
// node. The llist contains the names of these expressions. (This lists
// must be kept in the same order).
// UnPackRows::addLocalExpr potentially adds the unPackExpr_ expression
// ("unpack_expression") and the packingFactor_ expression ("packing_factor").
//
// It then calls RelExpr::addLocalExpr() which will add any RelExpr
// expressions to the list.
//
void UnPackRows::addLocalExpr(LIST(ExprNode *) &xlist,
LIST(NAString) &llist) const
{
if (unPackExprTree() || unPackExpr().entries() > 0) {
if(unPackExpr().isEmpty())
xlist.insert(unPackExprTree_);
else
xlist.insert(unPackExpr().rebuildExprTree(ITM_ITEM_LIST));
llist.insert("unpack_expression");
}
if (packingFactorTree() || packingFactor().entries() > 0) {
if (packingFactor().isEmpty())
xlist.insert(packingFactorTree_);
else
xlist.insert(packingFactor().rebuildExprTree(ITM_ITEM_LIST));
llist.insert("packing_factor");
}
if(indexValue_ != NULL_VALUE_ID) {
xlist.insert(indexValue_.getItemExpr());
llist.insert("index_value");
}
RelExpr::addLocalExpr(xlist,llist);
}
// UnPackRows::getPotentialOutputValues() ---------------------------------
// Construct a Set of the potential outputs of this node.
//
// Inputs: none (other than 'this')
//
// Outputs: outputValues - a ValueIdSet representing the potential outputs
// of this node.
//
// The potential outputs for the UnPackRows node are the new columns
// generated by the UnPackRows node. The new columns generated by UnPackRows
// are the syskey column and the value colunms.
//
void
UnPackRows::getPotentialOutputValues(ValueIdSet & outputValues) const
{
// Make sure the ValueIdSet is empty.
//
outputValues.clear();
// Add the values generated by the UnPackRows node.
//
outputValues.insertList(unPackExpr());
} // UnPackRows::getPotentialOutputValues()
void
UnPackRows::constructNewSyskeyPreds(ValueIdSet &predicates)
{
if(sysKeyId() == NULL_VALUE_ID) {
// Not Possible...
//
return;
}
ValueIdSet newPredicates;
for(ValueId pred = predicates.init(); predicates.next(pred);
predicates.advance(pred)) {
// First see if this pred has already been rewritten...
//
CollIndex predIndex =
originalPreds() ? originalPreds()->index(pred) : NULL_COLL_INDEX;
if (predIndex != NULL_COLL_INDEX) {
newPredicates += rewrittenPreds()->at(predIndex);
} else {
// Try to match predicates of the form (SYSKEY OP VALUE)
OperatorTypeEnum oper = pred.getItemExpr()->getOperatorType();
if(oper == ITM_EQUAL ||
oper == ITM_LESS ||
oper == ITM_LESS_EQ ||
oper == ITM_GREATER ||
oper == ITM_GREATER_EQ) {
if(pred.getItemExpr()->child(0) == sysKeyId()) {
// A match
ItemExpr *packedSyskey =
pred.getItemExpr()->child(0)->castToItemExpr()->child(0);
ItemExpr *oldValue = pred.getItemExpr()->child(1);
ItemExpr *maskConst =
new (CmpCommon::statementHeap()) SystemLiteral(65535);
const NAType *desiredType = &packedSyskey->getValueId().getType();
ItemExpr *maskValue =
new (CmpCommon::statementHeap())
Cast(maskConst, desiredType);
oldValue =
new (CmpCommon::statementHeap())
Cast(oldValue, desiredType);
ItemExpr *newValue = NULL;
switch(oper) {
case ITM_EQUAL:
newValue = new (CmpCommon::statementHeap())
Mask(ITM_MASK_CLEAR,oldValue, maskValue);
break;
case ITM_LESS:
newValue = new (CmpCommon::statementHeap())
Mask(ITM_MASK_SET, oldValue, maskValue);
break;
case ITM_LESS_EQ:
newValue = new (CmpCommon::statementHeap())
Mask(ITM_MASK_SET, oldValue, maskValue);
oper = ITM_LESS;
break;
case ITM_GREATER:
newValue = new (CmpCommon::statementHeap())
Mask(ITM_MASK_CLEAR, oldValue, maskValue);
oper = ITM_GREATER_EQ;
break;
case ITM_GREATER_EQ:
newValue = new (CmpCommon::statementHeap())
Mask(ITM_MASK_CLEAR, oldValue, maskValue);
break;
}
ItemExpr *newPred = new (CmpCommon::statementHeap())
BiRelat(oper, packedSyskey, newValue);
newPred->synthTypeAndValueId();
newPredicates += newPred->getValueId();
if(!originalPreds()) {
originalPreds() = new (CmpCommon::statementHeap()) ValueIdList();
rewrittenPreds() = new (CmpCommon::statementHeap()) ValueIdList();
}
originalPreds()->insert(pred);
rewrittenPreds()->insert(newPred->getValueId());
CMPASSERT(originalPreds()->entries() ==
rewrittenPreds()->entries());
}
}
}
}
predicates += newPredicates;
}
void
UnPackRows::pushdownCoveredExpr(const ValueIdSet &outputExpr,
const ValueIdSet &newExternalInputs,
ValueIdSet &predicatesOnParent,
const ValueIdSet *setOfValuesReqdByParent,
Lng32 childIndex
)
{
ValueIdSet exprOnParent;
if(setOfValuesReqdByParent)
exprOnParent = *setOfValuesReqdByParent;
exprOnParent += outputExpr ;
ValueId refVal;
ValueIdSet outputSet ;
// Prune from the unPackExpr() ValueIdSet, those expressions
// that are not needed above (in setOfValuesReqdByParent) or by
// the selectionPred.
//
for(ValueId unPackCol = unPackExpr().init(); unPackExpr().next(unPackCol);
unPackExpr().advance(unPackCol)) {
if(!exprOnParent.referencesTheGivenValue(unPackCol, refVal) &&
!selectionPred().referencesTheGivenValue(unPackCol, refVal)) {
unPackExpr() -= unPackCol;
}
}
// The packingFactor expr used to be modified here so that it
// used one of the columns used by unPackExpr. This code was
// kind of kludgy, since it required knowledge of the packingFactor
// expression. Since this node is now being used by RowSets,
// this code has been removed.
// Remove all expressions from exprOnParent. They
// can't be pushed down!
//
exprOnParent.clear();
// Add all the values required for the UnPackRows expressions
// to the values required by the parent. These expression
// can't be pushed down either, but attempting to push them
// down causes the child node to provide the values needed.
//
outputSet += unPackExpr();
exprOnParent += packingFactor();
constructNewSyskeyPreds(predicatesOnParent);
ValueIdSet emptySet;
RelExpr::pushdownCoveredExpr(outputSet,
newExternalInputs,
predicatesOnParent,
&emptySet,
childIndex);
} // UnPackRows::pushdownCoveredExpr
// UnPackRows::removeUnPackExprTree() -------------------------------------
// Return the unPackExprTree_ ItemExpr tree and set to NULL,
//
// Inputs: none (Other than 'this')
//
// Outputs: ItemExpr * - the value of unPackExprTree_
//
// Side Effects: Sets the value of unPackExprTree_ to NULL.
//
// Called by UnPackRows::bindNode(). The value of unPackExprTree_ is not
// needed after the binder.
//
ItemExpr *
UnPackRows::removeUnPackExprTree()
{
ItemExpr *result = unPackExprTree();
unPackExprTree() = (ItemExpr *)NULL;
return result;
}
// UnPackRows::removePackingFactorTree() -------------------------------------
// Return the packingFactorTree_ ItemExpr tree and set it to NULL,
//
// Inputs: none (Other than 'this')
//
// Outputs: ItemExpr * - the value of packingFactorTree_
//
// Side Effects: Sets the value of packingFactorTree_ to NULL.
//
// Call by UnPackRows::bindNode(). The value of keyCol_ is not
// needed after the binder.
//
ItemExpr *
UnPackRows::removePackingFactorTree()
{
ItemExpr *result = packingFactorTree();
packingFactorTree() = (ItemExpr *)NULL;
return result;
}
// The destructor
//
PhysUnPackRows::~PhysUnPackRows()
{
}
// PhysUnPackRows::copyTopNode ----------------------------------------------
// Copy a chain of derived nodes (Calls UnPackRows::copyTopNode).
// Needs to copy all relevant fields.
// Used by the Cascades engine.
//
// Inputs: derivedNode - If Non-NULL this should point to a node
// which is derived from this node. If NULL, then this
// node is the top of the derivation chain and a node must
// be constructed.
//
// Outputs: RelExpr * - A Copy of this node.
//
// If the 'derivedNode is non-NULL, then this method is being called
// from a copyTopNode method on a class derived from this one. If it
// is NULL, then this is the top of the derivation chain and a UnPackRows
// node must be constructed.
//
// In either case, the relevant data members must be copied to 'derivedNode'
// and 'derivedNode' is passed to the copyTopNode method of the class
// below this one in the derivation chain (UnPackRows::copyTopNode() in this
// case).
//
RelExpr *
PhysUnPackRows::copyTopNode(RelExpr *derivedNode, CollHeap *outHeap)
{
PhysUnPackRows *result;
if (derivedNode == NULL)
// This is the top of the derivation chain
// Generate an empty PhysUnPackRows node.
//
result = new (outHeap) PhysUnPackRows();
else
// A node has already been constructed as a derived class.
//
result = (PhysUnPackRows *) derivedNode;
// PhysUnPackRows has no data members.
// Copy any data members from the classes lower in the derivation chain.
//
return UnPackRows::copyTopNode(result, outHeap);
}
// UnPackRows::transformNode() -------------------------------------------
// Unconditional query transformations such as the transformation of
// a subquery to a semijoin are implemented by the virtual function
// transformNode(). The aim of such transformations is to bring the
// query tree to a canonical form. transformNode() also ensures
// that the "required" (or characteristic) input values are "minimal"
// and the "required" (or characteristic) outputs values are
// "maximal" for each operator.
//
// transformNode() is an overloaded name, which is used for a set
// of methods that implement the transformation phase of query
// normalization.
//
// We use the term query tree for a tree of relational operators,
// each of which can contain none or more scalar expression trees.
// The transformations performed by transformNode() brings scalar
// expressions into a canonical form. The effect of most such
// transformations is local to the scalar expression tree.
// However, the transformation of a subquery requires a semijoin
// to be performed between the relational operator that contains
// the subquery and the query tree for the subquery. The effect
// of such a subquery transformation is therefore visible not
// only in the scalar expression tree but also in the relational
// expression tree.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
// ExprGroupId & locationOfPointerToMe
// IN : a reference to the location that contains a pointer to
// the RelExpr that is currently being processed.
//
void UnPackRows::transformNode(NormWA &normWARef,
ExprGroupId &locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
// If this node has already been transformed, we are done.
//
if (nodeIsTransformed())
return;
// Make sure that it is only transformed once.
//
markAsTransformed();
// transformNode takes up a bound tree and turns into a transformed
// tree. For a RelExpr that means the following.
// + expressions are transformed. If the expressions contain
// subqueries then new RelExpr are created for them and
// they are usually added above (as an ancestor) of the node
// that contained them.
// + predicates are pulled up from the children and their
// required inputs are modified
// + the required inputs of the node the node itself are changed
// from being a sufficient set to being a sufficient minimal
// set.
//
// Transform the child.
// Pull up their transformed predicates
// recompute their required inputs.
//
child(0)->transformNode(normWARef, child(0));
/*
// Prune the unPackExpr early on for performance reasons in trasnformation
// and normalization so that it contains columns which are referenced in
// the query.
//
// Have at least two unpackcol item there to handle cases like count(*).
//
CMPASSERT(unPackExpr().entries() >= 2);
for(ValueId unPackCol = unPackExpr().init(); unPackExpr().next(unPackCol);
unPackExpr().advance(unPackCol)) {
BaseColumn *bcol =
(BaseColumn *)(unPackCol.getItemExpr()->child(0)->castToItemExpr());
if(!bcol->getNAColumn()->isReferenced()) {
unPackExpr() -= unPackCol;
}
}
ValueIdList newUnPackExpr;
ValueIdList newPackingFactor;
newUnPackExpr.insert(unPackExpr()[0]);
newPackingFactor.insert(packingFactor()[0]);
for(CollIndex i = 1; i < unPackExpr().entries(); i++)
{
if (unPackExpr()[i].getItemExpr()->getOperatorType()
== ITM_ROWSETARRAY_SCAN) {
newUnPackExpr.insert(unPackExpr()[i]);
}
}
unPackExpr().clear();
unPackExpr().insert(newUnPackExpr);
packingFactor().clear();
packingFactor().insert(newPackingFactor);
*/
if(unPackExpr().transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs())) {
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
if(packingFactor().
transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs())) {
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
// Since unPackExpr() and packingFactor() refer to indexValue,
// indexValue() may have been replaced with something else...
//
if (indexValue() != NULL_VALUE_ID)
indexValue() =
indexValue().getItemExpr()->getReplacementExpr()->getValueId();
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
//
pullUpPreds();
// transform the selection predicates
//
transformSelectPred(normWARef, locationOfPointerToMe);
} // UnPackRows::transformNode()
// UnPackRows::rewriteNode() ---------------------------------------------
// rewriteNode() is the virtual function that computes
// the transitive closure for "=" predicates and rewrites value
// expressions.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
void UnPackRows::rewriteNode(NormWA & normWARef)
{
RelExpr::rewriteNode(normWARef);
if(unPackExpr().normalizeNode(normWARef)) {
}
if(packingFactor().normalizeNode(normWARef)) {
}
// Since unPackExpr() and packingFactor() refer to indexValue,
// indexValue() may have been replaced with something else...
//
if (indexValue() != NULL_VALUE_ID)
indexValue() =
indexValue().getItemExpr()->getReplacementExpr()->getValueId();
} // UnPackRows::rewriteNode()
// UnPackRows::pullUpPreds() --------------------------------------------
// is redefined to disallow the pullup of predicates
// from the operator's child. UnPackRows can not pull up
// any predicates from its child since no of the outputs of the
// child are outputs of the UnPackRows.
//
void UnPackRows::pullUpPreds()
{
// ---------------------------------------------------------------------
// Simply don't pull up child's selection predicates. Still need to tell
// child to recompute its outer references due to the warning below.
// ---------------------------------------------------------------------
child(0)->recomputeOuterReferences();
// ---------------------------------------------------------------------
// WARNING: One rule that this procedure must follow is
// that recomputeOuterReferences() must be called on the children even
// if no predicates are pulled up from them. This is to correct
// the outer references that are added to a right child of a
// semi or outer join when processing subqueries in the ON clause.
// ---------------------------------------------------------------------
}
// UnPackRows::recomputeOuterReferences() --------------------------------
// This method is used by the normalizer for recomputing the
// outer references (external dataflow input values) that are
// still referenced by each operator in the subquery tree
// after the predicate pull up is complete.
//
// Side Effects: sets the characteristicInputs of the groupAttr.
//
void UnPackRows::recomputeOuterReferences()
{
// This is virtual method on RelExpr.
// When this is called it is assumed that the children have already
// been transformed.
// The required inputs of the child are therefore already minimal
// and sufficient.
// It is also assumed that the RelExpr itself has been bound.
// That implies that the group attributes have already been allocated
// and the required inputs is a sufficient (but not necessarilly minimum)
// set of external values needed to evaluate all expressions in this subtree.
//
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
//
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// The set of valueIds need by this node.
//
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr += unPackExpr();
allMyExpr += packingFactor();
// Remove from outerRefs those valueIds that are not needed
// by all my expressions
//
allMyExpr.weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my children need.
//
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// set my Character Inputs to this new minimal set.
//
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // UnPackRows::recomputeOuterReferences()
// UnPackRows::synthEstLogProp() ------------------------------------------
// synthesize estimated logical properties given a specific set of
// input log. properties.
//
// Parameters:
//
// EstLogPropSharedPtr inputEstLogProp
// IN : A set of input logical properties used to estimate the logical
// properities of this node.
//
void UnPackRows::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp)
{
if (getGroupAttr()->isPropSynthesized(inputEstLogProp) == TRUE)
return;
// Create a new Output Log Property
//
// EstLogPropSharedPtr myEstProps (
// new(CmpCommon::statementHeap()) EstLogProp());
// Get the current column stats.
//
// const ColStatDescList &outerColStatsList = inputEstLogProp->getColStats();
// NEED to compute new colstats for this node.
// !!!
// Get the estimated logical properties of the child. To be used
// to estimate the logical properties of this node.
//
EstLogPropSharedPtr childEstProps = child(0).outputLogProp(inputEstLogProp);
CostScalar rowCount =
childEstProps->getResultCardinality() * getMaxPackingFactor();
// Set the cardinality estimate.
//
// myEstProps->setResultCardinality(rowCount);
// UnPackRows node behaves like a scan of the unpacked logical table in
// the sense that it is responsible for picking up histogram statistics
// for the logical rows.
//
EstLogPropSharedPtr myEstProps;
if (unPackedTable())
myEstProps = synthEstLogPropForUnaryLeafOp (inputEstLogProp,
colStats(),
//unPackedTable()->getTableColStats(),
Cardinality(1.));
else
{
myEstProps = EstLogPropSharedPtr(new (STMTHEAP) EstLogProp (rowCount));
// For each characteristics output of this group, we want to create a
// histogram
ValueIdSet reqdOutputs = getGroupAttr()->getCharacteristicOutputs();
ColStatDescList & outputColStats = myEstProps->colStats();
for (ValueId charOutput = reqdOutputs.init();
reqdOutputs.next(charOutput);
reqdOutputs.advance(charOutput) )
{
outputColStats.addColStatDescForVirtualCol(rowCount,
rowCount,
charOutput,
charOutput,
charOutput,
NULL);
}
}
// Set the logical properties of this node.
//
getGroupAttr()->addInputOutputLogProp(inputEstLogProp, myEstProps);
} // UnPackRows::synthEstLogProp
// UnPackRows::synthLogProp ----------------------------------------------
// synthesize logical properties
//
void
UnPackRows::synthLogProp(NormWA * normWAPtr)
{
// check to see whether properties are already synthesized.
if (getGroupAttr()->existsLogExprForSynthesis())
return;
// Need to check for cardinality constraints and uniques constraints.
// !!!
RelExpr::synthLogProp(normWAPtr);
if (unPackedTable() == NULL)
{
// Nothing else to do here. This is a rowset since we do not have
// a table.
return;
}
// The columns whose statistics are to be fetched are marked referenced
// in the packed table's NATable. We have to transfer this information
// to the unpacked table's NATable.
//
// unpacked table's NATable object.
const NATable *unPackedNATable = unPackedTable()->getNATable();
// Get the original table name.
QualifiedName tableName = unPackedNATable->getTableName();
// Prepend original name with "PACKED__@" to give the packed table name.
NAString packedTableName( PACKED__, CmpCommon::statementHeap());
packedTableName += tableName.getObjectName();
tableName.setObjectName(packedTableName);
// Need to construct the ExtendedQualName to lookup NATableDB.
ExtendedQualName extendedTableName (tableName);
extendedTableName.setSpecialType(ExtendedQualName::VIRTUAL_TABLE);
// Fetch the packed table's NATable object.
NATable *packedNATable = ActiveSchemaDB()->getNATableDB()->get(&extendedTableName);
CMPASSERT(packedNATable);
const NAColumnArray packedColArray = packedNATable->getNAColumnArray();
const NAColumnArray unPackedColArray = unPackedNATable->getNAColumnArray();
// The columns in packed table are ordered the same way as in unpacked table.
CollIndex i = 0;
for(i = 0; i < packedColArray.entries(); i++)
{
// Mark corresponding columns in the unpacked table referenced for columns
// referenced in the packed table.
//
if(packedColArray[i]->isReferencedForMultiIntHist())
{
unPackedColArray[i]->setReferencedForMultiIntHist();
}
else if(packedColArray[i]->isReferencedForSingleIntHist())
{
unPackedColArray[i]->setReferencedForSingleIntHist();
}
else if(packedColArray[i]->isReferenced())
{
unPackedColArray[i]->setReferenced();
}
}
// Fetch the colstat for the logical table columns which are referenced.
// Map them to there corresponding UnPackCol expressions which are used
// above me.
//
// This triggers the fetch of histogram statistics which are then stored
// in the logical unpacked table descriptor as well as returned.
//
ColStatDescList &initialStats = unPackedTable()->tableColStats();
// Find out the base columns of all entries in unPackExpr_ and stored them
// in an array for later use.
//
LIST(BaseColumn *) packedBaseColList(STMTHEAP);
for(ValueId unPackCol = unPackExpr().init(); unPackExpr().next(unPackCol);
unPackExpr().advance(unPackCol))
{
ItemExpr *packedItem = unPackCol.getItemExpr()->child(0);
BaseColumn *baseCol;
// Probably the case here since everything has been converted to VEGref.
//
if(packedItem->getOperatorType() != ITM_BASECOLUMN)
{
if(packedItem->getOperatorType() == ITM_VEG_REFERENCE)
{
VEGReference *packedVEGRef = (VEGReference *) packedItem;
ValueIdSet packedVEGset = packedVEGRef->getVEG()->getAllValues();
CMPASSERT(packedVEGset.entries() > 0);
// Locate the base column entry in the VEG.
//
NABoolean found = FALSE;
for(ValueId colvid = packedVEGset.init();
packedVEGset.next(colvid);
packedVEGset.advance(colvid))
{
if(colvid.getItemExpr()->getOperatorType() == ITM_BASECOLUMN)
{
baseCol = (BaseColumn *)colvid.getItemExpr();
packedBaseColList.insert(baseCol);
found = TRUE;
break;
}
}
if(!found) packedBaseColList.insert(NULL);
}
else
// Left child neither a base column nor a VEGref. Problems...
packedBaseColList.insert(NULL);
}
else packedBaseColList.insert((BaseColumn *)packedItem);
}
// To-remove list to keep track of statistics to be removed from the Col
// StatDescList, which are not actually referenced.
//
ColStatDescList removeList(CmpCommon::statementHeap());
// We need to associate statistics of each logical column with its corr.
// UnPackCol expression, since it is in this form that the logical column
// is known above this UnPackRows node.
//
for (i = 0; i < initialStats.entries(); i++)
{
// Column this statistics is associated with.
//
const NAColumnArray &statCols =
initialStats[i]->getColStats()->getStatColumns();
// Single column histogram only right now.
//
if (statCols.entries() != 1) continue;
// Find out which logical column it is.
//
NABoolean found = FALSE;
CollIndex j = 0;
for(ValueId unPackCol = unPackExpr().init();
unPackExpr().next(unPackCol);
unPackExpr().advance(unPackCol), j++)
{
if (packedBaseColList[j] &&
packedBaseColList[j]->getColName() == statCols[0]->getColName())
{
// Got it! Associate corresponding unpackcol with that ColStatDesc.
//
found = TRUE;
initialStats[i]->VEGColumn() = unPackCol;
break;
}
}
// Erase entry from my ColStatDesc, since it's not used here.
if (!found) removeList.insert(initialStats[i]);
else colStats().insert(initialStats[i]);
}
// Remove unused statistics.
//
for (i = 0; i < removeList.entries(); i++)
initialStats.remove(removeList[i]);
} // UnPackRows::synthLogProp()
// UnPackRows::costMethod()
// Obtain a pointer to a CostMethod object providing access
// to the cost estimation functions for nodes of this type.
CostMethod*
PhysUnPackRows::costMethod() const
{
static THREAD_P CostMethodUnPackRows *m = NULL;
if (m == NULL)
m = new (GetCliGlobals()->exCollHeap()) CostMethodUnPackRows();
return m;
} // PhysUnPackRows::costMethod()
//<pb>
//==============================================================================
// Synthesize physical properties for Unpack operator's current plan
// extracted from a spcified context.
//
// Input:
// myContext -- specified context containing this operator's current plan.
//
// planNumber -- plan's number within the plan workspace. Used optionally for
// synthesizing partitioning functions but unused in this
// derived version of synthPhysicalProperty().
//
// Output:
// none
//
// Return:
// Pointer to this operator's synthesized physical properties.
//
//==============================================================================
PhysicalProperty *
PhysUnPackRows::synthPhysicalProperty(const Context *context,
const Lng32 planNumber,
PlanWorkSpace *pws)
{
const PhysicalProperty* const sppOfTheChild =
context->getPhysicalPropertyOfSolutionForChild(0);
// for now, simply propagate the physical property
PhysicalProperty *unPackPP =
new(CmpCommon::statementHeap()) PhysicalProperty(*sppOfTheChild);
if (unPackedTable() == NULL) { // we're a rowset unpack
if (getGroupAttr()->getResultCardinalityForEmptyInput() >
ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_5)) {
// rowset unpack should run in master because it's inefficient and
// risky to send rowsets from master to an ESP. An ESP eventually
// flows the rowset's data back to the master. An ESP parallel plan
// fragment with a large rowset can exceed IPC size limit and crash
// at run-time (see genesis case 10-060510-3471).
unPackPP->setLocation(EXECUTE_IN_MASTER);
}
}
return unPackPP;
} // UnPackRows::synthPhysicalProperty()
// UnPackRows::bindNode - Bind the UnPackRows node.
// This node has been generated by Scan::bindNode after it determined
// that this table is packed. This node has two item expressions:
//
// unPackExprTree(): This expression contains a list
// of expressions to unpack the SYSKEY and all the user columns
// of the packed table. The SYSKEY is unpacked by 'calculating'
// the SYSKEY value based on an initial value of SYSKEY for the
// packed row and the current value of the index into the packed
// row. The user columns are unpacked using the UnPackCol expression
// based on the value of the index. The index is provided at runtime
// thru a hostVar. The valueId of this HostVar must Refbe captured so
// that it can be used to map a local variable into the workAtp of
// the work procedure.
//
// packingFactorTree(). This expression contains a list
// of expressions to extract the NUMROWS field from each packed
// column. During normalization, this list will be reduced to
// only one. (The value of NUMROWS is the same in each packed column).
//
//
RelExpr *UnPackRows::bindNode(BindWA *bindWA)
{
// If this node has already been bound, we are done.
//
if (nodeIsBound())
return this;
// Bind the child nodes.
//
// Set the bindWA's view name resolution pointer to NULL before binding,
// since no objects below this level either (1) need to be recorded as
// dependencies for a view or (2) need to be linked back to their names
// in the original view text.
//
ParNameLocList *saveNameLocList = bindWA->getNameLocListPtr();
bindWA->setNameLocListPtr(NULL);
bindChildren(bindWA);
if (bindWA->errStatus())
return this;
// Bind the UnPackExprTree. This expression contains a list
// of expressions to unpack the SYSKEY and all the user columns
// of the packed table. The SYSKEY is unpacked by 'calculating'
// the SYSKEY value based on an initial value of SYSKEY for the
// packed row and the current value of the index into the packed
// row. The user columns are unpacked using the UnPackCol expression
// based on the value of the index. The index is provided at runtime
// thru a hostVar.
//
ValueIdList unPackExprList;
if(unPackExprTree()) {
// For rowsets, if the type is external (for instance, decimal or varchar),
// we must first convert to our internal equivalent type
if (unPackExprTree()->origOpType() == ITM_ROWSETARRAY_SCAN) {
const NAType *elemType = ((RowsetArrayScan *) unPackExprTree())->getType();
if (elemType->isExternalType()) {
NAType *internalType = elemType->equivalentType();
unPackExprTree() = new (bindWA->wHeap()) Cast(unPackExprTree(), internalType);
}
}
if (unPackExprTree()->origOpType() == ITM_ITEM_LIST) {
ItemExpr *exp = unPackExprTree();
while (exp) {
ItemExpr *elem;
for (Int32 i = 0; i < exp->getArity(); i++) {
elem = (ItemExpr *) (exp->getChild(i));
if (elem->origOpType() == ITM_ROWSETARRAY_SCAN) {
const NAType *elemType = ((RowsetArrayScan *) elem)->getType();
if (elemType->isExternalType()) {
NAType *internalType = elemType->equivalentType();
exp->child(i) = new (bindWA->wHeap()) Cast(elem, internalType);
}
}
}
if ( ((ItemExpr *) (exp->getChild(0)))->origOpType() != ITM_ITEM_LIST) {
break;
}
exp = (ItemExpr *) (exp->getChild(0));
}
}
// $$$ Trick bindwa to believe this expression to bind is just one in
// $$$ RI constraint so that it won't mark columns referenced. Better
// $$$ invent something new for real.
//
bindWA->getCurrentScope()->context()->inRIConstraint() = TRUE;
removeUnPackExprTree()->convertToValueIdList(unPackExprList,
bindWA,
ITM_ITEM_LIST);
unPackExpr().insertList(unPackExprList);
bindWA->getCurrentScope()->context()->inRIConstraint() = FALSE;
if (bindWA->errStatus())
return this;
}
// Bind the packingFactorTree. This expression contains an
// expression to extract the NUMROWS field from the first packed
// column of the smallest Access Path.
//
if(packingFactorTree()) {
// $$$ Trick bindwa to believe this expression to bind is just one in
// $$$ RI constraint so that it won't mark columns referenced. Better
// $$$ invent something new for real.
//
bindWA->getCurrentScope()->context()->inRIConstraint() = TRUE;
removePackingFactorTree()->convertToValueIdSet(packingFactor(),
bindWA,
ITM_ITEM_LIST);
bindWA->getCurrentScope()->context()->inRIConstraint() = FALSE;
if (bindWA->errStatus())
return this;
}
// Search for the valueId of the HostVar used for the index of the
// packed row. We must keep track of this so that we can map
// a variable local to the work procedure to be this valueid.
//
// We should have allocated a ValueId for the host variable at this
// point, but just in case
if (unPackExprTree()) {
if(indexValue_ == NULL_VALUE_ID) {
ValueIdSet leafValues;
unPackExprList[0].getItemExpr()->getLeafValueIds(leafValues);
for(ValueId leaf = leafValues.init();
leafValues.next(leaf);
leafValues.advance(leaf)) {
if(leaf.getItemExpr()->getOperatorType() == ITM_HOSTVAR) {
indexValue_ = leaf;
}
}
}
CMPASSERT(indexValue_ != NULL_VALUE_ID);
}
if (rwrsInputSizeExpr())
{
rwrsInputSizeExpr_ = rwrsInputSizeExpr_->bindNode(bindWA);
if (bindWA->errStatus ())
return NULL;
}
if (rwrsMaxInputRowlenExpr())
{
rwrsMaxInputRowlenExpr_ = rwrsMaxInputRowlenExpr_->bindNode(bindWA);
if (bindWA->errStatus ())
return NULL;
}
if (rwrsBufferAddrExpr())
{
rwrsBufferAddrExpr_ = rwrsBufferAddrExpr_->bindNode(bindWA);
if (bindWA->errStatus ())
return NULL;
}
// Construct the RETDesc for this node.
//
RETDesc *resultTable = new(bindWA->wHeap()) RETDesc(bindWA);
// Add the columns from the child to the RETDesc.
//
const RETDesc &childTable = *child(0)->getRETDesc();
const ColumnDescList *sysColList = childTable.getSystemColumnList();
// This ASSUMES that the columns in the child's RETDesc are
// in the same order as the expressions in unPackExpr().
// This is basically assuming that the child (Scan) produces
// the RETDesc in the same order as they appear in the NATable.
// This should probably be fixed by passing a list of ColRefNameObj()
// to the UnPackRows node.
//
CollIndex j = 0;
CollIndex i = 0;
for(i = 0; i < sysColList->entries(); i++) {
resultTable->addColumn(bindWA,
sysColList->at(i)->getColRefNameObj(),
(rowwiseRowset() ? sysColList->at(i)->getValueId()
: unPackExprList[j++]),
SYSTEM_COLUMN,
sysColList->at(i)->getHeading());
if(sysColList->entries() == 1) {
sysKeyId_ = unPackExprList[j-1];
} else {
sysKeyId_ = NULL_VALUE_ID;
}
}
// if this is rowwise rowset, then child's output values, which are
// the extracted values from the rowwise rowset buffer, are sent
// to my parent.
// The value ids of the params from my child are used in expressions
// above this node.
// Get all the values of my children and remember them.
for(i = 0; i < childTable.getColumnList()->entries(); i++) {
if (rowwiseRowset())
rwrsOutputVids_.insert(childTable.getValueId(i));
resultTable->addColumn(bindWA,
childTable.getColRefNameObj(i),
(rowwiseRowset() ? childTable.getValueId(i)
: unPackExprList[j++]),
USER_COLUMN,
childTable.getHeading(i));
}
// If the Table Desc for the logical unpacked table is set, map the vid's
// in there to the real vid's generated by unPackExpr_, assuming they are
// ordered the same way.
//
if(unPackedTable())
{
unPackedTable()->clearColumnList();
unPackedTable()->addToColumnList(unPackExprList);
}
// Set the return descriptor
//
setRETDesc(resultTable);
bindWA->getCurrentScope()->setRETDesc(resultTable);
//
// Bind the base class.
//
// Restore the name resolution pointer
//
bindWA->setNameLocListPtr(saveNameLocList);
return bindSelf(bindWA);
} // UnPackRows::bindNode()