blob: 974c055b71b22bad9f0a3705e89a0b6d932be4b3 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: NormRelExpr.C
* Description: Relational expressions (both physical and logical operators)
* Methods related to the normalizer
*
* Created: 5/17/94
* Language: C++
*
*
*
*
******************************************************************************
*/
#include "Debug.h"
#include "Sqlcomp.h"
#include "GroupAttr.h"
#include "opt.h"
#include "NormWA.h"
#include "AllRelExpr.h"
#include "AllItemExpr.h"
#include "ValueDesc.h"
#include "Triggers.h"
#include "Cost.h"
#include "CostMethod.h"
#include "opt.h"
#include "RelGrby.h"
#include "ItemFunc.h"
#include "ControlDB.h"
#include "Analyzer.h"
#include "MultiJoin.h"
#include "CompException.h"
#include "ExpPCodeOptimizations.h"
#include <math.h>
#include "OptRange.h"
#include "ItemOther.h"
#include "ItemExpr.h"
#include "QRDescGenerator.h"
#include "HBaseClient_JNI.h"
#include "HiveClient_JNI.h"
#ifndef TRANSFORM_DEBUG_DECL // artifact of NSK's OptAll.cpp ...
#define TRANSFORM_DEBUG_DECL
DBGDECLDBG( dbg; )
DBGDECL( static NAString unp; )
#endif
//----------------------------------------------------------------------
// static helper function: this is used by Join::transformNode() to decide
// to put the two vids in a VEG; currently this is checked for TSJs.
//----------------------------------------------------------------------
static NABoolean doTwoVidsReferToSameColumn(ValueId &vid, ValueId &vid1)
{
NAColumn *col = ((IndexColumn *) vid.getItemExpr())->getNAColumn();
NAColumn *col1 = ((IndexColumn *) vid1.getItemExpr())->getNAColumn();
if (col == NULL || col1 == NULL ) return FALSE;
if (col->getColName() != col1->getColName()) return FALSE;
if (col->getTableName(TRUE) == col1->getTableName(TRUE))
{
if (col->getTableName() == NULL)
return FALSE;
else
return TRUE;
}
return FALSE;
} // doTwoVidsReferToSameColumn()
static void tryToConvertFullOuterJoin(Join *fullOuterJoin, NormWA &normWARef)
{
NABoolean leftChildMerged, rightChildMerged = FALSE;
// Check if the left child's VEGRegion is merged
leftChildMerged = normWARef.locateVEGRegionAndCheckIfMerged(fullOuterJoin, 0 /* left child*/ );
// Check if the left child's VEGRegion is merged
rightChildMerged = normWARef.locateVEGRegionAndCheckIfMerged(fullOuterJoin, 1/* right child*/ );
// should not get here since we disable FOJ to inner in Join::bindNode() and
// should be removed when we support FOJ to inner
CMPASSERT(!leftChildMerged && !rightChildMerged);
if (leftChildMerged && !rightChildMerged)
fullOuterJoin->setOperatorType(REL_LEFT_JOIN);
if (rightChildMerged && !leftChildMerged)
fullOuterJoin->setOperatorType(REL_RIGHT_JOIN);
if (leftChildMerged && rightChildMerged)
fullOuterJoin->setOperatorType(REL_JOIN); // inner Join
switch (fullOuterJoin->getOperatorType())
{
case REL_LEFT_JOIN:
{
// This means the left child region (subtreeId = 0) is merged
// with the parent.
// Now merge the right child region (subtreeId = 1)
// and the join predicate region (subtreeId = 2)
VEGRegion * rightChildVEGRegion =
normWARef.locateVEGRegion(fullOuterJoin, 1/* right child*/ );
VEGRegion * joinPredVEGRegion =
normWARef.locateVEGRegion(fullOuterJoin, 2/* join predicate */ );
rightChildVEGRegion->mergeVEGRegion(joinPredVEGRegion);
VEGRegion *parentVEGRegion = rightChildVEGRegion->getParentVEGRegion();
CMPASSERT(parentVEGRegion); // MUST have a parent
parentVEGRegion->fixupZonesAfterFullToLeftConversion();
// We don't need to null instantiate the left rows anymore
fullOuterJoin->nullInstantiatedForRightJoinOutput().clear();
}
break;
case REL_RIGHT_JOIN:
break; // Hema TBD - need to flip
case REL_JOIN: //Inner Join
{
// TBD - Hema. Need some sort of assert that
// all the three regions (SubtreeId 0,1 & 2) are merged.
fullOuterJoin->selectionPred() += fullOuterJoin->joinPred();
fullOuterJoin->joinPred().clear();
}
break;
case REL_FULL_JOIN:
break; // do nothing. has not been transformed.
default:
ABORT("Internal error: tryToConvertFullOuterJoin()");
break;
}
}
// ***********************************************************************
// $$$$ RelExpr
// member functions for class RelExpr
// ***********************************************************************
// -----------------------------------------------------------------------
// RelExpr::transformNode()
// -----------------------------------------------------------------------
void RelExpr::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( locationOfPointerToMe.getPtr() == this );
if (nodeIsTransformed())
return;
markAsTransformed();
// ---------------------------------------------------------------------
// tranformNode takes up a bound tree and turns into a transformed
// tree. For a RelExpr that means the following.
// + expressions are transformed. If the expressions contain
// subqueries then new RelExpr are created for them and
// they are usually added above (as an ancestor) of the node
// that contained them.
// + predicates are pulled up from the children and their
// required inputs are modified
// + the required inputs of the node the node itself are changed
// from being a sufficient set to being a sufficient minimal
// set.
// ---------------------------------------------------------------------
Int32 arity = getArity();
// ---------------------------------------------------------------------
// Transform each child.
// Pull up their transformed predicates
// recompute their required inputs.
// ---------------------------------------------------------------------
for (Int32 i = 0; i < arity; i++)
{
// ---------------------------------------------------------------------
// Make values available to child
// ---------------------------------------------------------------------
child(i)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
child(i)->transformNode(normWARef, child(i));
// My child has now been transformed.
// A new semiJoin may now be my direct descendant and my original
// child a descendant of it.
// In either case my child has now been transformed.
};
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
pullUpPreds();
// transform the selection predicates
transformSelectPred(normWARef, locationOfPointerToMe);
} // RelExpr::transformNode()
// QSTUFF
// ***********************************************************************
// $$$$ RelExpr
// member functions for class RelExpr
// ***********************************************************************
// -----------------------------------------------------------------------
// RelExpr::checkReadWriteConflicts()
// -----------------------------------------------------------------------
RelExpr::rwErrorStatus RelExpr::checkReadWriteConflicts(NormWA & normWARef)
{
rwErrorStatus rc;
Int32 arity = getArity();
for (Int32 i = 0; i < arity; i++)
{
if ( (rc = child(i)->checkReadWriteConflicts(normWARef) ) != RWOKAY)
return rc;
}
return RWOKAY;
} // RelExpr::checkReadWriteConflicts()
// Build the MapValueIds node to sit on top of a GroupBy.
// This function is used by the SemanticQueryOptimization phase
// to insert a MapValueIds node on top of a GroupBy and a LeftJoin
// when a Join is converted to a LeftJoin during unnesting
// Also update the outputs of the MapValueIds node accordingly.
//------------------------------------------------------------------------------
MapValueIds * GroupByAgg::buildMapValueIdNode(ValueIdMap *map)
{
CollHeap* stmtHeap = CmpCommon::statementHeap() ;
// Create the MapValueIds node with the mapping.
MapValueIds *mapNode = new (stmtHeap) MapValueIds(this, *map);
// The inputs are same as for the child
mapNode->getGroupAttr()->addCharacteristicInputs(
getGroupAttr()->getCharacteristicInputs());
mapNode->primeGroupAttributes();
return mapNode;
} // GroupByAgg::buildMapValueIdNode()
// -----------------------------------------------------------------------
// RelRoot::checkReadWriteConflicts()
// -----------------------------------------------------------------------
RelExpr::rwErrorStatus RelRoot::checkReadWriteConflicts(NormWA & normWARef)
{
// checking is only done in the presence of embedded deletes and updates
if (!(getGroupAttr()->isEmbeddedUpdateOrDelete()) && isTrueRoot())
return RWOKAY;
rwErrorStatus rc;
Int32 arity = getArity();
for (Int32 i = 0; i < arity; i++)
{
if ( (rc = child(i)->checkReadWriteConflicts(normWARef) ) != RWOKAY)
return rc;
}
return RWOKAY;
} // RelRoot::checkReadWriteConflicts()
// -----------------------------------------------------------------------
// RelScan::checkReadWriteConflicts()
// -----------------------------------------------------------------------
RelExpr::rwErrorStatus Scan::checkReadWriteConflicts(NormWA & normWARef)
{
NAString fileName (
getTableDesc()->getNATable()->
getClusteringIndex()->getFileSetName().getQualifiedNameAsString(),
CmpCommon::statementHeap());
CollIndex i = 0;
for (i=0; i < normWARef.getWriteList().entries(); i++)
if (strcmp(normWARef.getWriteList()[i], fileName) == 0) {
*CmpCommon::diags() << DgSqlCode(-4152)
<< DgTableName(getTableDesc()->getNATable()->getTableName().getQualifiedNameAsAnsiString());
return RWERROR;
}
for (i=0; i < normWARef.getReadList().entries(); i++)
if (strcmp(normWARef.getReadList()[i], fileName) == 0) {
return RWOKAY;
}
normWARef.getReadList().insert(fileName);
return RWOKAY;
} // Scan::checkReadWriteConflicts()
// -----------------------------------------------------------------------
// GenericUpdate::checkReadWriteConflicts()
// -----------------------------------------------------------------------
RelExpr::rwErrorStatus GenericUpdate::checkReadWriteConflicts(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// This routine checks whether the same table is both read from and
// updated in the same query. This is done after transformation and binding
// to ensure that all inlining of operations already happened and removal
// as well as removal of structural nodes.
// ---------------------------------------------------------------------
NAString fileName(
getTableDesc()->getNATable()->
getClusteringIndex()->getFileSetName().getQualifiedNameAsString(),
CmpCommon::statementHeap());
CollIndex i = 0;
for ( i=0; i < normWARef.getReadList().entries(); i++)
if (strcmp(normWARef.getReadList()[i], fileName) == 0) {
*CmpCommon::diags() << DgSqlCode(-4152)
<< DgTableName(getTableDesc()->getNATable()->getTableName().getQualifiedNameAsAnsiString());
return RWERROR;
}
for ( i=0; i < normWARef.getWriteList().entries(); i++)
if (strcmp(normWARef.getWriteList()[i], fileName) == 0) {
return RWOKAY;
}
normWARef.getWriteList().insert(fileName);
return RWOKAY;
} // GenericUpdate::checkReadWriteConflicts()
// QSTUFF
// -----------------------------------------------------------------------
// Could/should be a ValueIdSet:: method.
// NEED A WAY TO GUARANTEE THAT THIS SET REPRESENTS AN *AND*ed LOGICAL SET,
// not an ITM_ITEM_LIST or other backboned set.
// This guarantee is true for the two RelExpr::transformSelectPred() callers
// in this file.
// -----------------------------------------------------------------------
static void applyTruthTable(ValueIdSet & vs)
{
// If this ValueIdSet is an ANDed set of value-items, i1 AND i2 AND ..., then:
// Remove any item which is a simple TRUE:
// il..AND TRUE AND..ir => il AND ir
// If any item is a simple FALSE, ignore all other items:
// il..AND FALSE AND..ir => FALSE
for (ValueId vid = vs.init(); vs.next(vid); vs.advance(vid))
{
OperatorTypeEnum op = vid.getItemExpr()->getOperatorType();
if (op == ITM_RETURN_TRUE)
vs -= vid;
else if (op == ITM_RETURN_FALSE)
{
vs.clear();
vs += vid;
break;
}
}
}
// Breadth First Traversal to print the transformed and source tree.
Int32 printTree(ItemExpr *ptrToTree,ItemExpr *parent, Int32 depth, Int32 l1)
{
if (ptrToTree != NULL)
{
Int32 left, right;
if (depth == 0)
{
if(l1 == 0)
cout << "root ";
if(l1 == 1)
cout << "left child ";
if(l1 == 2)
cout << "right child ";
cout << " ValueId: " << ptrToTree->getValueId() << " Value:" << ptrToTree->getText() << " Parent ValueId: " << parent->getValueId() << " Parent Value:" << parent->getText() << endl;
return 1;
}
left = printTree(ptrToTree->child(0),ptrToTree, depth - 1, 1);
right = printTree(ptrToTree->child(1),ptrToTree, depth - 1, 2);
return left || right;
}
return 0;
}
static ItemExpr* transformUnSupportedNotEqualTo(CollHeap *heap, ItemExpr* itemNotEqual)
{
if(itemNotEqual->getOperatorType() == ITM_NOT_EQUAL)
{
ItemExpr* newLeftNode = new (heap) BiRelat(ITM_LESS,itemNotEqual->child(0),itemNotEqual->child(1));
ItemExpr* newRightNode = new (heap) BiRelat(ITM_GREATER,itemNotEqual->child(0),itemNotEqual->child(1));
ItemExpr* result = new (heap) BiLogic(ITM_OR,newLeftNode,newRightNode);
result->synthTypeAndValueId();
return result;
}
return NULL;
}
// shrinkNewTree(argv[]):
// This function in turn calls union() and intersection() function on RangeSpec object, through
// the wrapper object RangeSpecRef->getRangeObject() to club the values together.
// getRangeObject() call gives RangeSpec object.
// This gets called while performing
// (1) an "OR" operation between an RangeSpecRef ItemExpression
// (operator type =ITM_RANGE_SPEC_FUNC) and an OR'ed Set (operator type =ITM_OR)
// (2) an "AND" operation between an RangeSpecRef ItemExpression
// (operator type =ITM_RANGE_SPEC_FUNC) and an OR'ed Set (operator type =ITM_AND)
// (3) an "AND/OR" operation between an RangeSpecRef ItemExpression
// (operator type =ITM_RANGE_SPEC_FUNC) and an RangeSpecRef ItemExpression (operator type =ITM_RANGE_SPEC_FUNC)
//
// Simple usage example: where a = 10 or b = 20 or a=30;
//
// Step 1:
// Or'ed Set = (RangeSpecRef(a=10),RangeSpecRef(b=20))
// RangeSpecRef = (a=30)
// Step 2:
// Or'ed Set = (RangeSpecRef(a=10,a=30),RangeSpecRef(b=20))
// Step 3: returns true;
//
//
// argv[] ={ OperatorTypeEnum op, ItemExpr *ptrToNewTree, RangeSpecRef* xref}
// argv[0] -> needed since union and insersection on a RangeSpec object is determined by this parameter:
// -> { ITM_OR, ITM_AND }
// argv[1] -> { OR'ed set, AND'ed Set} where OR'ed set = { RangeSpecRef(a), RangeSpecRef(b), ..}
// OR'ed set and AND'ed set can only exist if it is more than one column, the name "set" represents that
// argv[2] -> xref = Xternal, which is the RangeSpecRef object needs to be merged
// argv[3] -> normWARef, the Normalizer work area, passed on to getRangeItemExpr()
// so the range ItemExpr can be normalized.
// shrinkNewTree() returns (true/false) -> Boolean instead of void is needed to optimize the traversal of the Tree.
NABoolean Scan::shrinkNewTree(OperatorTypeEnum op, ItemExpr *ptrToNewTree,
RangeSpecRef* xref, NormWA& normWARef)
{
NABoolean status = false;
// Need to work on the leaf's of ptrToNewTree tree,
// which is of RangeSpecRef(ITM_RANGE_SPEC_FUNC) type object
if (ptrToNewTree->getOperatorType() == ITM_RANGE_SPEC_FUNC)
{
RangeSpecRef* rangeIE = (RangeSpecRef *)ptrToNewTree;
OptNormRangeSpec* destObj = rangeIE->getRangeObject();
CMPASSERT(destObj != NULL);
if (op == ITM_OR)
{
if (destObj->getRangeExpr()->getValueId() ==
xref->getRangeObject()->getRangeExpr()->getValueId())
{
destObj->unionRange(xref->getRangeObject());
rangeIE->setChild(1, const_cast<ItemExpr*>
(destObj->getRangeItemExpr(&normWARef)));
status = true;
}
}
else if( op == ITM_AND)
{
if (destObj->getRangeExpr()->getValueId() ==
xref->getRangeObject()->getRangeExpr()->getValueId())
{
destObj->intersectRange(xref->getRangeObject());
rangeIE->setChild(1, const_cast<ItemExpr*>
(destObj->getRangeItemExpr(&normWARef)));
status = true;
}
}
}
else
{
// Internal and's for ROOT as or which can't be converted and vice versa.
// for the above tree, op = ITM_OR which is the root
// whenever we first hit an "AND'ed set" for the OR root
// we don't traverse the AND'ed set formed.
// since Or'ed set and And'ed set are disjoint.
// For a tree like the following, we dont traverse the cut(s) while
// merging the RangeSpecRef with New ItemExpression:
// Or
// / \
// / RangeSpecRef(a(4))
// or'ed set
// / \/
// / / \
// / / and'ed set
// / cut / \
// RangeSpecRef RangeSpecRef
// / (b,BiRel(=1)) (c,BiRel(=3))
// /
// RangeSpecRef
// (leftchild=a,
// rightchild=Reconstructed ItemExpression ( for values{1,2}) i.e. or
// \ cut(we don't traverse) /\
// \ / / \
// / \ a=1 a=2
// / \
// and'ed set
// / \
// / \
// RangeSpecRef(a,BiRel(=4))RangeSpecRef(b,BiRel(=6))
if (op == ptrToNewTree->getOperatorType() )
{
// Traverse Left of OR'ed set or AND'ed set
status = shrinkNewTree(op,ptrToNewTree->child(0),xref,normWARef);
// Optimization: if(!status)
// No need to traverse the right child of the tree,
// since already found the match.
if(!status)
status = shrinkNewTree(op,ptrToNewTree->child(1),xref,normWARef);
}
}
return status;
}
#define AVR_STATE0 0
#define AVR_STATE1 1
#define AVR_STATE2 2
ItemExpr * Scan::applyAssociativityAndCommutativity(
QRDescGenerator *descGenerator,
CollHeap *heap,
ItemExpr *origPtrToOldTree,
NormWA& normWARef,
NABoolean& transformationStatus)
{
if( CmpCommon::getDefault(SUBSTRING_TRANSFORMATION) != DF_OFF )
return origPtrToOldTree;
ItemExpr * newLeftNode = NULL ;
ItemExpr * newRightNode = NULL ;
ItemExpr * newNode = NULL ;
ItemExpr * ptrToOldTree = NULL ;
//
// applyAssociativityAndCommutativity() used to be called recursively not just
// for all the items in an expression but for all the items in the node
// tree for an entire query. Consequently, we must eliminate the recursive
// calls to applyAssociativityAndCommutativity() by keeping the
// information needed by each "recursive" level in the HEAP and using
// a "while" loop to look at each node in the tree in the same order as
// the old recursive technique would have done.
// The information needed by each "recursive" level is basically just
// * a pointer to what node (ItemExpr *) to look at next,
// * a "state" value that tells us where we are in the
// applyAssociativityAndCommutativity() code for the ItemExpr node
// that we are currently working on, and
// * a pointer to the new left node (from the "recursive" call on child(0))
// which we need to have available *after* recursing down the child(1) tree.
// NOTE: We don't have to keep the ptr to the new right node in a similar
// fashion because the code does not assign to 'newRightNode' until *after*
// all recursion is finished.
//
ARRAY( ItemExpr * ) IEarray(heap, 10) ; //Initially 10 elements (no particular reason to choose 10)
ARRAY( Int16 ) state(heap, 10) ; //These ARRAYs will grow automatically as needed.)
ARRAY( ItemExpr *) leftNodeArray(heap, 10);
Int32 currIdx = 0;
IEarray.insertAt( currIdx, origPtrToOldTree );
state.insertAt( currIdx, AVR_STATE0 );
// if(ptrToOldTree->getOperatorType() == ITM_NOT_EQUAL)
// ptrToOldTree = transformUnSupportedNotEqualTo(heap,ptrToOldTree);
while ( currIdx >= 0 )
{
ptrToOldTree = IEarray[currIdx] ;
// Convert the expression to a rangespec immediately under any of the following
// conditions:
// 1) The expression is a leaf predicate (not an AND or OR).
// 2) The expressions is rooted by an OR node that is derived from an in-list.
// This is guaranteed to be an OR backbone of conditions on the same
// column/expr, and can be handled by createRangeSpec() without the overhead
// of recursing through applyAssociativityAndCommutativity(), which incurs
// a massive usage of memory for a large in-list. See bug #3248.
// 3) The expression has already undergone rangespec conversion.
if((ptrToOldTree->getOperatorType() != ITM_AND &&
ptrToOldTree->getOperatorType() != ITM_OR)
||
(ptrToOldTree->getOperatorType() == ITM_OR &&
static_cast<BiLogic*>(ptrToOldTree)->createdFromINlist())
||
ptrToOldTree->isRangespecItemExpr())
{
OptNormRangeSpec* range = static_cast<OptNormRangeSpec*>(
OptRangeSpec::createRangeSpec(descGenerator,
ptrToOldTree,
heap,
TRUE));
// Transforms all Birel ItemExpression into RangeSpecRef ItemExpression
if( range != NULL)
{
RangeSpecRef *refrange = new (heap)
RangeSpecRef(ITM_RANGE_SPEC_FUNC,
range,
range->getRangeExpr(),
range->getRangeItemExpr(&normWARef));
transformationStatus = TRUE;
// Ensure that base column value ids are replaced by vegrefs (Bugzilla 2808).
refrange->getReplacementExpr()->normalizeNode(normWARef);
newNode = refrange ;
}
else
newNode = ptrToOldTree ;
}
else
{
// Recurse through for ITM_AND/ITM_OR
// depth first traversal
if ( state[currIdx] == AVR_STATE0 )
{
state.insertAt( currIdx, AVR_STATE1 ) ;
currIdx++ ; //"Recurse" down to child 0
state.insertAt( currIdx, AVR_STATE0 ) ; // and start that child's state at 0
IEarray.insertAt( currIdx, ptrToOldTree->child(0) ) ;
continue ;
}
else if ( state[currIdx] == AVR_STATE1 )
{
leftNodeArray.insertAt( currIdx, newNode ); //Save the "return value" from recursion
state.insertAt( currIdx, AVR_STATE2 ) ;
currIdx++ ; //"Recurse" down to child 1
state.insertAt( currIdx, AVR_STATE0 ) ; // and start that child's state at 0
IEarray.insertAt( currIdx, ptrToOldTree->child(1) ) ;
continue ;
}
else
{
newLeftNode = leftNodeArray[currIdx] ; //Restore 'newLeftNode'
state.insertAt( currIdx, AVR_STATE0 ) ; //Mark us as done with this IE
newRightNode = newNode ; // Set newRightNode = "return value" from recursion
}
// case OR:
if ((newLeftNode->getOperatorType() == ITM_RANGE_SPEC_FUNC) &&
(newRightNode->getOperatorType() == ITM_RANGE_SPEC_FUNC))
{
// where a = 10 or b =20
// where a = 10 or a =20
if(shrinkNewTree(ptrToOldTree->getOperatorType(),
newLeftNode, (RangeSpecRef *)newRightNode, normWARef))
{
newNode = (ItemExpr *)newLeftNode;
}
else
{
// where a = 10 or b =20
newNode = new (heap) BiLogic(ptrToOldTree->getOperatorType(),
(RangeSpecRef *)newLeftNode,
(RangeSpecRef *)newRightNode);
}
}
else if((newLeftNode->getOperatorType() == ptrToOldTree->getOperatorType())
&& (newRightNode->getOperatorType() == ITM_RANGE_SPEC_FUNC))
{
// where a = 10 or b =20 or a =30
// ored set = ((a=10),(b=20))
// we are merging anded set with rangespec (a=30)
// if shrinkNewTree() returns true then intervals are already merged in shrinkNewTree(),
// since matching columns are
// found in the ored set.
// else we add the rangespec into ored set.
if(!shrinkNewTree(ptrToOldTree->getOperatorType(),
newLeftNode,(RangeSpecRef *)newRightNode,normWARef))
{
newNode = new (heap) BiLogic(ptrToOldTree->getOperatorType(),
newLeftNode,
(RangeSpecRef *)newRightNode);
}
else
newNode = (ItemExpr *)newLeftNode;
}
// This condition is redundant, not able to formulate any query for this
// we can't generate tree like
// Or
// / \
// OrSet OrSet
else if((newLeftNode->getOperatorType() ==
ptrToOldTree->getOperatorType()) &&
(newRightNode->getOperatorType() ==
ptrToOldTree->getOperatorType()))
{
newNode = new (heap) BiLogic(ptrToOldTree->getOperatorType(),
newLeftNode,newRightNode);
}
else if ((newLeftNode->getOperatorType() == ITM_RANGE_SPEC_FUNC)
&& (newRightNode->getOperatorType() == ptrToOldTree->getOperatorType()))
{
// where a = 10 or b =20 or a =30
// ored set = ((a=10),(b=20))
// we are merging anded set with rangespec (a=30)
// if shrinkNewTree() returns true then intervals are already merged in shrinkNewTree(),
// since matching columns are
// found in the ored set.
// else we add the rangespec into ored set.
if(!shrinkNewTree(ptrToOldTree->getOperatorType(),
newRightNode,(RangeSpecRef *)newLeftNode,normWARef))
{
newNode = new (heap) BiLogic(ptrToOldTree->getOperatorType(),
(RangeSpecRef *)newLeftNode,
newRightNode);
}
else
newNode = (ItemExpr *)newRightNode;
}
else
{
newNode = new (heap) BiLogic(ptrToOldTree->getOperatorType(),
newLeftNode,newRightNode);
}
// If user had specified selectivity for original predicate,
// then apply the same to the new predicate as well.
if(ptrToOldTree->isSelectivitySetUsingHint())
{
if(newNode->getOperatorType() == ITM_RANGE_SPEC_FUNC)
{
newNode->child(1)->setSelectivitySetUsingHint();
newNode->child(1)->setSelectivityFactor(ptrToOldTree->getSelectivityFactor());
}
else
{
newNode->setSelectivitySetUsingHint();
newNode->setSelectivityFactor(ptrToOldTree->getSelectivityFactor());
}
}
CMPASSERT(newNode != NULL);
}
if ( state[currIdx] == AVR_STATE0 ) // if done with current ItemExpr
currIdx-- ; // then return to parent
}
return newNode;
}
// -----------------------------------------------------------------------
// RelExpr::transformSelectPred()
// Do the common steps in processing selection predicates
// -----------------------------------------------------------------------
void RelExpr::transformSelectPred(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
// ---------------------------------------------------------------------
// This is a common procedure for relExprs to process the subquery
// or Isolated UDFunction predicates in its select list. It sets up the
// required inputs and outputs of the RelExpr before adding new Join
// nodes above it. The subquery/UDFunction transformation logic needs the
// required inputs of the node to properly reflect the final
// (transformed) inputs and outputs.
// ---------------------------------------------------------------------
ValueIdSet subqueryOrIsolatedUDFunctionPredicates;
const NABoolean movePredicates = TRUE;
const NABoolean postJoinPredicates = TRUE;
// ---------------------------------------------------------------------
// Compute the potential inputs and outputs for the node before
// transforming the selectionPred so that the characteristic outputs
// are correct (they are needed by the subquery transformation).
// ---------------------------------------------------------------------
primeGroupAttributes();
// remove the subquery predicates from the select list
selectionPred().removeSubqueryOrIsolatedUDFunctionPredicates (
subqueryOrIsolatedUDFunctionPredicates);
// ---------------------------------------------------------------------
// Save the original inputs to use when the subquery predicates get
// transformed.
// ---------------------------------------------------------------------
ValueIdSet externalInputs = getGroupAttr()->getCharacteristicInputs();
// ---------------------------------------------------------------------
// Transform the remaining selection predicates.
// ---------------------------------------------------------------------
if (selectionPred().transformNode(normWARef, locationOfPointerToMe,
externalInputs, movePredicates,
postJoinPredicates))
{
// -----------------------------------------------------------------
// No subqueries should have been left here.
// -----------------------------------------------------------------
CMPASSERT(0);
}
applyTruthTable(selectionPred());
// ---------------------------------------------------------------------
// Transform the subquery predicates.
// ---------------------------------------------------------------------
// semiJoin's that are added should be added between me and my parent.
if (subqueryOrIsolatedUDFunctionPredicates.transformNode(normWARef,
locationOfPointerToMe,
externalInputs, movePredicates,
postJoinPredicates))
{
// -----------------------------------------------------------------
// The transformed subquery predicate requires values that are
// produced by the semiJoin above me.
// The transform predicate was moved there.
// -----------------------------------------------------------------
}
applyTruthTable(subqueryOrIsolatedUDFunctionPredicates);
// ---------------------------------------------------------------------
// Add the transform subquery predicates back to the selection predicates.
// Some subquery predicates transform into regular predicates
// e.g. EXISTS (SELECT MAX(t.a) FROM t) ==> TRUE
// ---------------------------------------------------------------------
selectionPred() += subqueryOrIsolatedUDFunctionPredicates;
// ---------------------------------------------------------------------
// If I am no longer the direct descendant of my parent then transform
// the usurper. During its transformation it may get a taste of its
// own medicine and stop becoming the direct descendant of my parent.
// ---------------------------------------------------------------------
if (locationOfPointerToMe != (const RelExpr *)this)
{
locationOfPointerToMe->transformNode(normWARef,
locationOfPointerToMe);
// ---------------------------------------------------------------------
// If this whole subquery is under an OR or inside a complicated expr,
// this flag has been set while the expr is transformed. This is done
// so that when the new join introduced and its right child are being
// transformed, we won't incorrectly use the selection predicates in
// the subquery to convert left join elsewhere into inner join. This
// has been achieved now, so resetting the flag.
// ---------------------------------------------------------------------
if (normWARef.subqUnderExprTree())
normWARef.restoreSubqUnderExprTreeFlag();
// We are on our way back from a number of transformNode()s.
// Let's just make sure that the final usurper got transformed
CMPASSERT( locationOfPointerToMe->nodeIsTransformed());
}
// If there is a selection predicate, we check to see if there are
// constant expressions in it, and we compute them, i.e. this is
// Constant Folding
ValueIdList foldPreds;
foldPreds = getSelectionPred();
if (!foldPreds.isEmpty()) {
NABoolean allTrue = foldPreds.constantFolding();
if (!foldPreds.isEmpty()) {
CMPASSERT(selPredTree() == NULL);
if (allTrue) foldPreds.clear();
setSelectionPredicates(foldPreds);
}
}
} // RelExpr::transformSelectPred()
// -----------------------------------------------------------------------
// RelExpr::pullUpPreds()
// Most operators transmit predicates to their parents as-is.
// -----------------------------------------------------------------------
void RelExpr::pullUpPreds()
{
// ---------------------------------------------------------------------
// This method is called on a RelExpr so that it can gather the
// predicates of its immediate children unto itself.
// It is a virtual function.
// PullUpPreds gets from all the children the predicates they
// can surrender is adds them to the local selectionPred()
// ---------------------------------------------------------------------
Int32 arity = getArity();
for (Int32 i = 0; i < arity; i++)
{
selectionPred() += child(i)->getSelectionPred();
child(i)->selectionPred().clear();
child(i)->recomputeOuterReferences();
};
// ---------------------------------------------------------------------
// WARNING: One rule that this procedure must follow is
// that recomputeOuterReferences() must be called on the children even
// if no predicates are pulled up from them. This is to correct
// the outer references that are added to a right child of a
// semi or outer join when processing subqueries in the ON clause.
// ---------------------------------------------------------------------
} // RelExpr::pullUpPreds()
// -----------------------------------------------------------------------
// RelExpr::recomputeOuterReferences()
// -----------------------------------------------------------------------
void RelExpr::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// This is virtual method on RelExpr.
// When this is called it is assumed that the children have already
// been transformed.
// The required inputs of the child are therefore already minimal
// and sufficient.
// It is also assumed that the RelExpr itself has been bound.
// That implies that the group attributes have already been allocated
// and the required inputs is a sufficient (but not necessarilly minimum)
// set of external values needed to evaluate all expressions in this subtree.
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// Remove from outerRefs those valueIds that are not needed
// by my selection predicate
selectionPred().weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my children need.
Int32 arity = getArity();
for (Int32 i = 0; i < arity; i++)
{
outerRefs += child(i).getPtr()->getGroupAttr()->getCharacteristicInputs();
}
// set my Character Inputs to this new minimal set.
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // RelExpr::recomputeOuterReferences()
// -----------------------------------------------------------------------
// RelExpr::rewriteNode()
// -----------------------------------------------------------------------
void RelExpr::rewriteNode(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// Rewrite the expressions of each child.
// ---------------------------------------------------------------------
Int32 nc = getArity();
for (Int32 i = 0; i < nc; i++)
child(i)->rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Rewrite the expressions in the selection preidcates.
// ---------------------------------------------------------------------
if (selectionPred().normalizeNode(normWARef))
{
}
// ++MV
if (getUniqueColumns().normalizeNode(normWARef))
{
}
// --MV
// ---------------------------------------------------------------------
// Rewrite the expressions in the Group Attributes.
// ---------------------------------------------------------------------
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // RelExpr::rewriteNode()
// -----------------------------------------------------------------------
// RelExpr::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * RelExpr::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
Int32 arity = getArity();
// --------------------------------------------------------------------
// Check which expressions can be evaluated by my child.
// Modify the Group Attributes of those children who
// ---------------- inherit some of
// these expressions.
// ---------------------------------------------------------------------
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred());
// ---------------------------------------------------------------------
// Transform each child.
// ---------------------------------------------------------------------
for (Int32 i = 0; i < arity; i++)
child(i) = child(i)->normalizeNode(normWARef);
// The essential char. outputs of my child can be fully computed only
// when the essential char. outputs of my grandchildren are fully computed
// This is because one of the rules for computing essential char. outputs
// is : An ouput that is essential in my child will stay essential in me.
// This rule can be enforced in the bottom-up part of the tree walk, while
// pushDownCoveredExpr which computes outputs is performed in the top-down
// part. Therefore we need to call this method here to set up the essential
// outputs correctly. PushDownCoveredExpr also has this logic, in phases
// beyond the normalizer this method need not be called again, its only that
// for the first time the logic in pushDownCoveredExpr is not sufficient as
// the grandchildren don't have any essential outputs yet.
fixEssentialCharacteristicOutputs();
return this;
} // RelExpr::normalizeNode()
// -----------------------------------------------------------------------
// RelExpr::semanticQueryOptimizeNode()
// -----------------------------------------------------------------------
RelExpr * RelExpr::semanticQueryOptimizeNode(NormWA & normWARef)
{
Int32 arity = getArity();
// ---------------------------------------------------------------------
// SemanticQueryOptimize each child.
// ---------------------------------------------------------------------
for (Int32 i = 0; i < arity; i++)
child(i) = child(i)->semanticQueryOptimizeNode(normWARef);
return this;
} // RelExpr::semanticQueryOptimizeNode()
// -----------------------------------------------------------------------
// RelExpr::getMoreOutputsIfPossible()
// This method is recursive. It is capable of making a tree walk down from
// the RelExpr pointed to by "this" and promoting the outputs
// of the children of each node so that the "this" node has all columns
// required to produce the valueids in the parameter outputsNeeded.
// If all members of outputsNeeded cannot be produced due to the presence
// of some operator that does not allow outputs from children to flow through
// (like groupby or sequence) then this method returns FALSE.
// Currently, this method is used towards the end of unnesting a tsj node.
// As part of the unnesting process, the left child of the join is
// required to produce additional columns which have been identified
// as a unique set for the left sub-tree. Sometimes it is possible that
// the children of the Join's left child are not producing one or more
// members of this unique set.
// -----------------------------------------------------------------------
NABoolean RelExpr::getMoreOutputsIfPossible(ValueIdSet& outputsNeeded)
{
// no additional outputs are needed.
if (outputsNeeded.isEmpty())
return TRUE;
Int32 i, nc ;
ValueIdSet tempSet, potentialOutputsFromChildren, newOutputsNeeded ;
ValueIdSet emptySet,coveredExprs, coveredSubExprs;
GroupAttributes fakeGA;
NABoolean gotOutputsNeeded = FALSE ;
// in the top down part of the tree-walk check if the children of
// current node can produce the required outputs, if so the tree
// walk need not proceed any futher.
nc = getArity();
for(i = 0; i < nc ; i++)
{
child(i).getPtr()->getPotentialOutputValuesAsVEGs(tempSet);
potentialOutputsFromChildren += tempSet ;
fakeGA.addCharacteristicInputs(child(i).getGroupAttr()->getCharacteristicInputs());
}
fakeGA.addCharacteristicOutputs(potentialOutputsFromChildren);
fakeGA.coverTest(outputsNeeded,
emptySet, // additional inputs not provided
coveredExprs,
emptySet, // additional inputs not provided
&coveredSubExprs,
&newOutputsNeeded);
if (NOT newOutputsNeeded.isEmpty())
{
// children of current node could not produce all needed outputs
// proceed further down the tree, looking for needed outputs.
for(i = 0; i < nc ; i++)
{
if (NOT gotOutputsNeeded)
gotOutputsNeeded = child(i).getPtr()->
getMoreOutputsIfPossible(newOutputsNeeded) ;
}
}
// In the bottom-up part of the tree walk, add to the outputs of
// of children whatever values will cover any part of outPutsNeeded.
// If ouputsNeeded cannot be entirely satisfied, return FALSE.
// check what the children are capable of producing now that their
// outputs have been possibly increased, If children still cannot
// produce all all outputs needed (ever after the recursive
// call returned TRUE), then that means that the child is an operator
// like SEQUENCE that does not allow outputs to flow through. Return
// FALSE in this case.
potentialOutputsFromChildren.clear();
newOutputsNeeded.clear();
for(i = 0; i < nc ; i++)
{
child(i).getPtr()->getPotentialOutputValuesAsVEGs(tempSet);
potentialOutputsFromChildren += tempSet ;
}
fakeGA.addCharacteristicOutputs(potentialOutputsFromChildren);
fakeGA.coverTest(outputsNeeded,
emptySet, // additional inputs not provided
coveredExprs,
emptySet, // additional inputs not provided
&coveredSubExprs,
&newOutputsNeeded);
// increase outputs for children if all is well.
ValueIdSet outputsToAdd, maxOutputs ;
for(i = 0; i < nc ; i++)
{
outputsToAdd.clear();
maxOutputs.clear();
child(i).getPtr()->getPotentialOutputValuesAsVEGs(maxOutputs);
outputsToAdd.accumulateReferencedValues(
maxOutputs,
outputsNeeded);
child(i)->getGroupAttr()->addCharacteristicOutputs(outputsToAdd);
if (getOperatorType() == REL_MAP_VALUEIDS)
{
((MapValueIds *)this)->addSameMapEntries(outputsToAdd);
}
// child(i).getGroupAttr()->computeCharacteristicIO
// (emptySet, // no additional inputs
// outputsNeeded);
}
if (NOT newOutputsNeeded.isEmpty())
{
outputsNeeded = newOutputsNeeded ;
return FALSE ;
}
else
{
outputsNeeded.clear();
return TRUE ;
}
}
// RelExpr::getMoreOutputsIfPossible()
// ***********************************************************************
// $$$$ Join
// member functions for class Join
// ***********************************************************************
// -----------------------------------------------------------------------
// Join::transformNode()
// -----------------------------------------------------------------------
void Join::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
// ---------------------------------------------------------------------
// Rewrite a Right Join as a Left Join
// ---------------------------------------------------------------------
if (getOperatorType() == REL_RIGHT_JOIN)
{
setOperatorType(REL_LEFT_JOIN);
// switch the left and right subtrees
RelExpr * temp = child(0).getPtr();
child(0) = child(1).getPtr();
child(1) = temp;
}
if (isInnerNonSemiJoin())
{
// -----------------------------------------------------------------
// If not a SEMI or OUTER join then move the predicates in joinPred_
// to the selection predicates.
// -----------------------------------------------------------------
selectionPred() += joinPred_;
joinPred_.clear();
}
// before triggers need special handling for subqueries
if (child(0)->getOperatorType() == REL_BEFORE_TRIGGER)
{
normWARef.setInBeforeTrigger(TRUE);
}
// Make values available to the childs
ValueIdSet availableValues = getGroupAttr()->getCharacteristicInputs();
child(1)->getGroupAttr()->addCharacteristicInputs(availableValues);
if (isTSJForMergeUpsert())
{
ValueIdSet subqVids;
for (ValueId vid = availableValues.init();
availableValues.next(vid); availableValues.advance(vid)) {
if (vid.getItemExpr()->getOperatorType() == ITM_ROW_SUBQUERY)
subqVids.insert(vid);
}
availableValues -= subqVids;
//remove subqueries
}
child(0)->getGroupAttr()->addCharacteristicInputs(availableValues);
// ---------------------------------------------------------------------
// Allocate a new VegRegion for the left subtree for
// full outer join.
// This is need so as to convert the Full Outer to
// (a) Left Join - if there is a selection predicate on a column that
// suffers from null-instantiation and that column is
// part of the join column and that column is covered by
// left subtree.
// (b) Right Join - if there is a selection predicate on a column that
// suffers from null-instantiation and that column is
// part of the join column and that column is covered by
// right subtree.
// (c) Inner Join - if both (a) and (b) is true. That is there is a predicate
// that satisfies (a) and there is predicate that
// statisfies (b).
// ---------------------------------------------------------------------
if (isFullOuterJoin())
normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,
this, // owner
0 // first child
);
// ---------------------------------------------------------------------
// Transform the left child.
// Put any semijoins between the child and myself
// ---------------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
// Return to my own VEGRegion.
if (isFullOuterJoin())
normWARef.restoreOriginalVEGRegion();
// ---------------------------------------------------------------------
// Initialize a new VEGRegion when entering the right subtree of a
// Left Join. The new VEGRegion should be capable of importing
// any outer references and exporting any value that suffers null
// instantiation.
// We don't really need the right subtree to be in a different VEGRegion
// from the left subtree since it cannnot reference any values produced
// from there anyway. But the on clause needs to be in a different VEGRegion
// and since the on clause may introduce semiJoin's in the right subtree
// it is more convenient to put both the subtree and the ON clause in the
// same VEGRegion.
// ---------------------------------------------------------------------
if (isLeftJoin() OR isFullOuterJoin() OR isAntiSemiJoin())
{
// Create a new VEGRegion for the right child for full outer Join.
if (isFullOuterJoin())
normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,
this, // owner
1 //second child
);
else
normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,
this); // default to first child.
}
// ---------------------------------------------------------------------
// Transform the right child.
// Put any semijoins between the child and myself
// ---------------------------------------------------------------------
child(1)->transformNode(normWARef, child(1));
if (isFullOuterJoin())
normWARef.restoreOriginalVEGRegion();
// done transforming before triggers subqueries
if (normWARef.isInBeforeTrigger())
{
normWARef.setInBeforeTrigger(FALSE);
}
// ---------------------------------------------------------------------
// If there is a joinPred transform them. Put any new Joins between this
// join and my current transformed child. The predicates may reference
// values from the left child.
// ---------------------------------------------------------------------
// Create a new VEGRegion for the Full Outer Region.
// The Join Predicate will reside in this VEGRegion.
// The selection predicate will remain in the parent's
// VEGRegion.
if (isFullOuterJoin())
normWARef.allocateAndSetVEGRegion(IMPORT_ONLY,
this, // owner
2 // third child
);
// TBD - Hema.
// Disallow subqueries in Join Predicate in Full Outer Join.
const NABoolean movePredicates = TRUE;
ValueIdSet externalInputs(getGroupAttr()->getCharacteristicInputs());
externalInputs += child(0)->getGroupAttr()->getCharacteristicOutputs();
if (joinPred().transformNode(normWARef,child(1),
externalInputs,movePredicates ))
{
// Transform the new right child
child(1)->transformNode(normWARef, child(1));
// -----------------------------------------------------------------
// The transformed subquery predicate required values that are
// produced by the semiJoin who now is my right child
// The transformed predicates was moved there and the required
// inputs for my child are now correct (sufficient and minimal).
// -----------------------------------------------------------------
// Check to see if we need to turn this into a TSJ.
ValueIdSet neededInputs;
neededInputs = child(1).getPtr()->getGroupAttr()->getCharacteristicInputs();
neededInputs -= getGroupAttr()->getCharacteristicInputs();
ValueIdSet crossReferences;
crossReferences = child(0)->getGroupAttr()->getCharacteristicOutputs();
// --------------------------------------------------------------------
// At this point of transformation, the vid's in the different parts
// of the query tree might be inconsistent due to replacement expr
// being set. This will be corrected during normalization. Here, we
// need to explicitly compared the vid's of the replacement expr's.
// --------------------------------------------------------------------
ValueIdSet neededInputs2;
ValueId vid;
for (vid = neededInputs.init();
neededInputs.next(vid);
neededInputs.advance(vid))
neededInputs2.insert(vid.getItemExpr()->
getReplacementExpr()->getValueId());
ValueIdSet crossReferences2;
for (vid = crossReferences.init();
crossReferences.next(vid);
crossReferences.advance(vid))
crossReferences2.insert(vid.getItemExpr()->
getReplacementExpr()->getValueId());
crossReferences2.intersectSet(neededInputs2);
// If the right child needs values from the left child, turn this
// Join into a TSJ
if(NOT crossReferences2.isEmpty() && NOT isTSJ())
{
convertToTsj();
// After we transform the right child and we pullup predicates
// we may turn back to a non TSJ if we were able to pull-up
// all those predicates that needed values from the left child.
}
// Verify that we can produce every value the right child needs
neededInputs2 -= crossReferences2;
// neededInputs is now what the right child needs and is
// neither an input to this join nor an output of the left child
CMPASSERT(neededInputs2.isEmpty());
}
// ---------------------------------------------------------------------
// Restore the original VEGRegion.
// ---------------------------------------------------------------------
if (isLeftJoin() OR isFullOuterJoin() OR isAntiSemiJoin())
normWARef.restoreOriginalVEGRegion();
#if 0
// ---------------------------------------------------------------------
// Try to create "singleton" VEG with an null-inst value to emulate
// what's happening with base columns and index columns. This might not
// really be necessary and is therefore commented out for now pending
// a more detailed study.
// ---------------------------------------------------------------------
// Go through null-instantiated outputs and add columns to VEG.
if (isLeftJoin())
{
for (CollIndex x = 0; x < nullInstantiatedOutput().entries(); x++)
{
ValueId vid = nullInstantiatedOutput().at(x);
normWARef.addVEG(vid,vid);
}
}
#endif
DBGSETDBG( "TRANSFORM_DEBUG" )
DBGIF(
unp = "";
selectionPred().unparse(unp);
cerr << "Join selpred: " << unp << endl;
)
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
pullUpPreds();
DBGIF(
unp = "";
selectionPred().unparse(unp);
cerr << "Join selpred: " << unp << endl;
)
if (CmpCommon::getDefault(NOT_IN_OPTIMIZATION) == DF_ON)
{
if (isAntiSemiJoin())
{
// if there are NotIn(A,B) predicate try transforming it to A=B if possible
ValueIdSet origSet;
ValueIdSet newSet;
rewriteNotInPredicate(origSet, newSet);
if (newSet.entries()>0)
{
normWARef.locateAndSetVEGRegion(this);
newSet.transformNode(normWARef,child(1),
externalInputs,movePredicates );
normWARef.restoreOriginalVEGRegion();
joinPred() -= origSet;
joinPred() += newSet;
}
}
}
// ---------------------------------------------------------------------
// Convert a tsj to a join when a value that is produced by the left
// subtree is not referenced in the right subtree.
// If the tsj right child contain triggers - don't convert to join.
// Triggers need to be activated for each left row even if they don't
// reference the left subtree data.
//
// For RoutineJoins/Udfs we also want to convert it to a join if the UDF
// does not need any inputs from the left and if the routine is
// deterministic.
// ---------------------------------------------------------------------
if (isTSJ())
{
/*--- old code (see comment below why it's replaced)
ValueIdSet outerRefs =
child(0)->getGroupAttr()->getCharacteristicOutputs();
outerRefs.intersectSet
(child(1)->getGroupAttr()->getCharacteristicInputs());
---*/
// Check to see if we need to turn this into a TSJ.
ValueIdSet neededInputs;
neededInputs = child(1).getPtr()->getGroupAttr()->getCharacteristicInputs();
// is this ok? Our set of char. inputs may not yet be minimal,
// and could contain char. outputs from the left child.
neededInputs -= getGroupAttr()->getCharacteristicInputs();
ValueIdSet crossReferences;
crossReferences = child(0)->getGroupAttr()->getCharacteristicOutputs();
// --------------------------------------------------------------------
// At this point of transformation, the vid's in the different parts
// of the query tree might be inconsistent due to replacement expr
// being set. This will be corrected during normalization. Here, we
// need to explicitly compared the vid's of the replacement expr's.
// --------------------------------------------------------------------
ValueIdSet neededInputs2;
ValueId vid;
for (vid = neededInputs.init();
neededInputs.next(vid);
neededInputs.advance(vid))
neededInputs2.insert(vid.getItemExpr()->
getReplacementExpr()->getValueId());
ValueIdSet crossReferences2;
for (vid = crossReferences.init();
crossReferences.next(vid);
crossReferences.advance(vid))
crossReferences2.insert(vid.getItemExpr()->
getReplacementExpr()->getValueId());
crossReferences2.intersectSet(neededInputs2);
// The above logic looks at intersection of ValueIdSet of outputs
// of child(0) and inputs of child(1) to decide if a join is a TSJ.
// Sometimes, an expression (such as a base column of child(0))
// may have two distinct valueids depending on where it is appearing;
// i.e., it may have v1 as valueid on left side (child(0)) and
// v2 on right side
// (child(1)). In these cases, the simple intersection test is not
// complete. So, we look at physical index columns and names
// to deduce that the two valueids are infact same or not.
if (crossReferences2.isEmpty())
{
ValueIdSet eis; // for storing equivalent index set
for (vid = crossReferences.init();
crossReferences.next(vid);
crossReferences.advance(vid))
{
if (vid.getItemExpr()->getOperatorType() != ITM_BASECOLUMN)
continue;
eis += ((BaseColumn *)vid.getItemExpr())->getEIC();
} // for populate eis set
ValueId vid1;
ValueIdSet eis1; // for storing equivalent index set
for (vid1 = neededInputs.init();
neededInputs.next(vid1);
neededInputs.advance(vid1))
{
if (vid1.getItemExpr()->getOperatorType() != ITM_BASECOLUMN)
continue;
eis1+= ((BaseColumn *)vid1.getItemExpr())->getEIC();
} // populate eis1 set
// now compare physical fileset name and column names, to see
// if any two columns from sets eis and eis1 are same
ValueIdSet rightChildInputs =
child(1).getPtr()->getGroupAttr()->getCharacteristicInputs();
for (vid = eis.init();
eis.next(vid);
eis.advance(vid))
{
if (vid.getItemExpr()->getOperatorType() != ITM_INDEXCOLUMN)
continue;
for (vid1 = eis1.init();
eis1.next(vid1);
eis1.advance(vid1))
{
if (vid1.getItemExpr()->getOperatorType() != ITM_INDEXCOLUMN)
continue;
if ( doTwoVidsReferToSameColumn(vid, vid1) )
{
normWARef.addVEG(
((IndexColumn *)vid.getItemExpr())->getDefinition(),
((IndexColumn *)vid1.getItemExpr())->getDefinition()
);
//-------------------------------------------------------
// Genesis Case: 10-000626-1151:
// This is a TSJ: if right child is asking for a valueid
// that is in VEG as that of left child is producing, then
// right child may ask for what left child is producing
//--------------------------------------------------------
rightChildInputs -=
((IndexColumn *)vid1.getItemExpr())->getDefinition();
rightChildInputs +=
((IndexColumn *)vid.getItemExpr())->getDefinition();
crossReferences2.insert(vid);
}
} // inner for
} // outer for
child(1).getPtr()->getGroupAttr()->
setCharacteristicInputs(rightChildInputs);
} // isEmpty()
if (crossReferences2.isEmpty() &&
!isTSJForWrite() &&
!getInliningInfo().isDrivingPipelinedActions() &&
!getInliningInfo().isDrivingTempInsert() && // Triggers -
!(isRoutineJoin() &&
child(1).getGroupAttr()->getHasNonDeterministicUDRs()))
{
// Remember we used to be a RoutineJoin. This is used to determine
// what type of contexts for partitioning we will try in OptPhysRel.
if (isRoutineJoin())
setDerivedFromRoutineJoin();
convertToNotTsj();
}
else
{
// We have a TSJ that will be changed to Nested join
// safe to change NotIn here to non equi-predicate form (NE)
// at this point only the case on single column NotIn can reach here
// and the either the outer or inner column or both is nullable
// and may have null values
resolveSingleColNotInPredicate();
}
}
transformSelectPred(normWARef, locationOfPointerToMe);
} // Join::transformNode()
// -----------------------------------------------------------------------
// Join::pullUpPreds()
// -----------------------------------------------------------------------
void Join::pullUpPreds()
{
// We don't pull up predicateds for Full Outer Join (FOJ).
// That is because we don't try to push them down during
// normalization.
// Just recomputeOuterReferences() on both children.
if (getOperatorType() == REL_FULL_JOIN)
{
child(0)->recomputeOuterReferences();
child(1)->recomputeOuterReferences();
return;
}
// ---------------------------------------------------------------------
// Pull up predicates from each child.
// Accumulate the predicates from each of my child subtrees.
// ---------------------------------------------------------------------
// Pull up the predicates from the left child
// ---------------------------------------------------------------------
selectionPred() += child(0)->getSelectionPred();
child(0)->selectionPred().clear();
child(0)->recomputeOuterReferences();
// ---------------------------------------------------------------------
// If outer/semi join then predicates from the right child go to
// joinPred otherwise they go to the selectionPred.
// ---------------------------------------------------------------------
if (isInnerNonSemiJoin() || getOperatorType() == REL_TSJ_FLOW)
{
selectionPred() += child(1)->getSelectionPred();
}
else
{
joinPred() += child(1)->getSelectionPred();
}
child(1)->selectionPred().clear();
child(1)->recomputeOuterReferences();
//----------------------------------------------------------------------
// if am a SemiJoin and any of my joinPred is covered by my inputs
// and my first child output then move that to the selectionPed.
//----------------------------------------------------------------------
ValueIdSet predicatesToMove, boringSet, predicatesThatStay;
if (isSemiJoin()) // anti-joins, left-joins shouldn't do this!
{
getGroupAttr()->coverTest(joinPred(),
child(0)->getGroupAttr()->getCharacteristicOutputs(),
predicatesToMove,
boringSet,
&predicatesThatStay);
if (NOT predicatesToMove.isEmpty())
{
joinPred() -= predicatesToMove;
selectionPred() += predicatesToMove;
}
}
} // Join::pullUpPreds()
// -----------------------------------------------------------------------
// Join::recomputeOuterReferences()
// -----------------------------------------------------------------------
void Join::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty())
{
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// Weed out those expressions not needed by my selectionPred and joinPred
ValueIdSet exprSet = getSelectionPred();
exprSet += joinPred();
exprSet.insertList(nullInstantiatedOutput());
exprSet.insertList(nullInstantiatedForRightJoinOutput());
exprSet.weedOutUnreferenced(outerRefs);
// Add back those expressiones needed by my left child
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// If it is a TSJ don't add the outputs of the left child to
// the needed inputs.
exprSet = child(1).getPtr()->getGroupAttr()->getCharacteristicInputs();
if (isTSJForMergeUpsert())
{
ValueIdSet exprSet2;
ValueId vid;
for (vid = exprSet.init();
exprSet.next(vid);
exprSet.advance(vid))
exprSet2.insert(vid.getItemExpr()->
getReplacementExpr()->getValueId());
exprSet2.removeCoveredExprs(child(0).getPtr()->getGroupAttr()->getCharacteristicOutputs());
outerRefs += exprSet2;
}
else
{
if (isTSJ())
{
exprSet.removeCoveredExprs(child(0).getPtr()->getGroupAttr()->getCharacteristicOutputs());
}
outerRefs += exprSet;
}
getGroupAttr()->setCharacteristicInputs(outerRefs);
}
} // Join::recomputeOuterReferences()
// ----------------------------------------------------------------------
// Fix genesis case 10-061010-8731, solution 10-061010-9689 RFE in which
// queries like
// select ... from t1
// where t1.c not in (select t2.c from t2 where t2.c is not null ...)
// and t1.c is not null ...
// is compiled into a horribly inefficient but correct plan like
// nested_anti_semi_join(pa(t1), pa(t2))
// which generates a cross product of t1 with t2 and applies the predicate
// not((t1.c <> t2.c) is true)
// A joke is that the Bank of America query reported in the case would have
// taken 5 years to run. A much better plan would be
// hash_anti_semi_join(pa(t1), pa(t2))
// which generates a join of t1 with t2 and applies the predicate
// t1.c = t2.c
// Using this plan, the Bank of America query completes in under 2 minutes.
// ----------------------------------------------------------------------
void Join::tryToRewriteJoinPredicate(NormWA & normWARef)
{
// applies only to anti_semi_joins
if (!isAntiSemiJoin()) {
return;
}
// look for "not((t1.c <> t2.c) is true)"
for (ValueId exprId = joinPred().init();
joinPred().next(exprId);
joinPred().advance(exprId)) {
ItemExpr *iePtr = exprId.getItemExpr();
if (iePtr->getOperatorType() == ITM_NOT) {
ItemExpr *grandkid, *kid = iePtr->child(0);
if (kid && kid->getOperatorType() == ITM_IS_TRUE &&
(grandkid=kid->child(0)) != NULL &&
grandkid->getOperatorType() == ITM_NOT_EQUAL) {
// look for conditions that can guarantee opds' non-nullability, eg,
// look for "t1.c is not null && t2.c is not null"
ValueIdSet preds = joinPred(); // start with join predicate
preds -= exprId; // remove "not((t1.c <> t2.c) is true)" from set
preds += selectionPred(); // add any selection predicate
if (preds.isNotNullable(grandkid->child(0)) &&
preds.isNotNullable(grandkid->child(1))) {
#ifndef NDEBUG
FILE *logF = NULL;
NABoolean logRewrites =
CmpCommon::getDefault(COMP_BOOL_138) == DF_OFF &&
CmpCommon::getDefault(COMP_BOOL_137) == DF_ON;
if (logRewrites &&
(logF = fopen("rewriteJoinPredicateLog.txt", "a")) != NULL) {
preds.print(logF, "", "ASJ predicates:");
iePtr->print(logF);
}
#endif
// both operands are guaranteed to be non-null. replace
// "not((t1.c<>t2.c) is true) && t1.c is not null && t2.c is not null"
// with "t1.c=t2.c".
ItemExpr *eqpred = new(normWARef.wHeap())
BiRelat(ITM_EQUAL, grandkid->child(0).getPtr(),
grandkid->child(1).getPtr());
((BiRelat *)eqpred)->
specialMultiValuePredicateTransformation() = TRUE;
exprId.replaceItemExpr(eqpred);
eqpred->synthTypeAndValueId(TRUE);
#ifndef NDEBUG
if (logRewrites && logF) {
exprId.getItemExpr()->print(logF);
fclose(logF);
}
#endif
}
}
}
}
}
// -----------------------------------------------------------------------
// Join::rewriteNode()
// -----------------------------------------------------------------------
void Join::rewriteNode(NormWA & normWARef)
{
NABoolean isALeftJoin = isLeftJoin();
NABoolean isAFullOuterJoin = isFullOuterJoin();
NABoolean isASemiJoin = isSemiJoin();
// ---------------------------------------------------------------------
// Check if this is a Left Join.
// ---------------------------------------------------------------------
if (isALeftJoin && !isAFullOuterJoin)
{
if (canConvertLeftJoinToInnerJoin(normWARef))
{
// -------------------------------------------------------------
// Convert the operator so that it is no longer an outer join.
// -------------------------------------------------------------
convertToNotOuterJoin();
isALeftJoin = FALSE; // no longer a LEFT JOIN
// -------------------------------------------------------------
// Combine all the predicates together.
// -------------------------------------------------------------
if (isASemiJoin || isAntiSemiJoin())
{
CMPASSERT (FALSE) ; // left joins can't be semi!
joinPred() += getSelectionPred();
selectionPred().clear();
}
else
{
selectionPred() += joinPred();
joinPred().clear();
}
}
}
// Check if it's a full outer join
if (isAFullOuterJoin)
{
tryToConvertFullOuterJoin(this, normWARef);
// that means Full Outer Join has been converted.
if (getOperatorType() != REL_FULL_JOIN)
isAFullOuterJoin = FALSE; // no longer a FULL OUTER
}
// try to rewrite join predicate from
// not((t1.c <> t2.c) is true) and t1.c is not null and t2.c is not null
// to
// t1.c = t2.c and t1.c is not null and t2.c is not null
// -----------------------------------------------------------
// When NOT_IN_OPTIMIZATION is ON we don't need to call
// tryToRewriteJoinPredicate method anymore.
// We may need to remove this call and the method in the future
if (CmpCommon::getDefault(COMP_BOOL_138) == DF_OFF &&
CmpCommon::getDefault(NOT_IN_OPTIMIZATION) == DF_OFF) {
tryToRewriteJoinPredicate(normWARef);
}
// ---------------------------------------------------------------------
// Rewrite the expressions of the left child.
// ---------------------------------------------------------------------
if (isAFullOuterJoin)
normWARef.locateAndSetVEGRegion(this, 0 /* first child */);
child(0)->rewriteNode(normWARef);
// -----------------------------------------------------------------
// Normalize the values that will be subject to null-instantiation
// with values in the Child(0) region.
// -----------------------------------------------------------------
normalizeNullInstantiatedForRightJoinOutput(normWARef);
// -----------------------------------------------------------------
// Restore the original VEGRegion.
// -----------------------------------------------------------------
if(isAFullOuterJoin)
normWARef.restoreOriginalVEGRegion();
// ---------------------------------------------------------------------
// Rewrite the expressions of the right child.
// ---------------------------------------------------------------------
if (isALeftJoin OR isAFullOuterJoin OR isAntiSemiJoin())
{
// -----------------------------------------------------------------
// Locate and set the VEGRegion for the ON clause.
// This is done in order to rewrite "=" predicates in terms of
// the VEGs that are valid within its VEGRegion.
// -----------------------------------------------------------------
if (isAFullOuterJoin)
normWARef.locateAndSetVEGRegion(this, 1 /* second child */);
else
normWARef.locateAndSetVEGRegion(this);
child(1)->rewriteNode(normWARef);
// -----------------------------------------------------------------
// Normalize the values that will be subject to null-instantiation
// with values in the Child(1) region.
// -----------------------------------------------------------------
normalizeNullInstantiatedOutput(normWARef);
// -----------------------------------------------------------------
// Restore the original VEGRegion.
// -----------------------------------------------------------------
if (isAFullOuterJoin)
normWARef.restoreOriginalVEGRegion();
// -----------------------------------------------------------------
// Rewrite expressions in the ON clause predicate.
// -----------------------------------------------------------------
if (isAFullOuterJoin)
normWARef.locateAndSetVEGRegion(this, 2 /* third child */);
normWARef.setInJoinPredicate(TRUE) ;
if (joinPred().normalizeNode(normWARef))
{
}
normWARef.setInJoinPredicate(FALSE) ;
// -----------------------------------------------------------------
// Restore the original VEGRegion.
// -----------------------------------------------------------------
normWARef.restoreOriginalVEGRegion();
} // normalize the ON clause of the LEFT Join
else
{ // normalize the ON clause of the INNER Join
child(1)->rewriteNode(normWARef);
// -----------------------------------------------------------------
// Rewrite expressions in the ON clause predicate.
// -----------------------------------------------------------------
if (joinPred().normalizeNode(normWARef))
{
}
} // normalize the ON clause of the INNER Join
// ---------------------------------------------------------------------
// Rewrite expressions in the WHERE clause predicate tree.
// ---------------------------------------------------------------------
if (selectionPred().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite the ValueIdMap between the select and the update part so
// it has VEGReferences init (note that we avoided VEGies that span
// both the select and the update part, this is (probably?) one
// reason why we only normalized one half of the keys preds above.
// ---------------------------------------------------------------------
if( getInliningInfo().isDrivingMvLogInsert()
&&
NULL != updateSelectValueIdMap_ )
{
updateSelectValueIdMap_->normalizeNode(normWARef);
// If a VID in the bottom is of the form ValueIdUnion(x,x),
// then replace it with x. This is necessary to push down UPDATEs
// with MV attached tables into DP2.
const ValueIdList& originalBottomValues =
updateSelectValueIdMap_->getBottomValues();
ValueIdList newBottomValues(originalBottomValues);
for(CollIndex i = 0; i < originalBottomValues.entries(); i++) {
ItemExpr* x = originalBottomValues[i].getItemExpr();
if (x && x->getOperatorType() == ITM_VALUEIDUNION &&
((ValueIdUnion*)x) -> getLeftSource() ==
((ValueIdUnion*)x) -> getRightSource()
)
{
newBottomValues[i] = ((ValueIdUnion*)x)->getRightSource();
} else
newBottomValues[i] = originalBottomValues[i];
}
updateSelectValueIdMap_ = new (CmpCommon::statementHeap())
ValueIdMap(updateSelectValueIdMap_->getTopValues(), newBottomValues);
}
// ---------------------------------------------------------------------
// Rewrite expressions in the Group Attributes.
// ---------------------------------------------------------------------
if (isALeftJoin)
normWARef.saveLeftJoinChildVEGRegion(this,0);
((ValueIdSet &)getGroupAttr()->getCharacteristicInputs()).normalizeNode(normWARef);
if (isALeftJoin)
normWARef.resetLeftJoinChildVEGRegion();
((ValueIdSet &)getGroupAttr()->getCharacteristicOutputs()).normalizeNode(normWARef);
// getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // Join::rewriteNode()
// -----------------------------------------------------------------------
// Join::canConvertLeftJoinToInnerJoin()
// Currently handles LEFT JOIN only.
// -----------------------------------------------------------------------
NABoolean Join::canConvertLeftJoinToInnerJoin(NormWA & normWARef)
{
return normWARef.locateVEGRegionAndCheckIfMerged(this);
} // Join::canConvertLeftJoinToInnerJoin()
// -----------------------------------------------------------------------
// Join::normalizeNullInstantiatedOutput()
//
// A method for normalizing the operands of an InstantiateNull operator
// that appears in the nullInstantiatedOutput(). A special method is
// necessary to prevent an InstantiateNull that appears in this list
// from being replaced with a VEGReference for the VEG to which it
// belongs.
// -----------------------------------------------------------------------
void Join::normalizeNullInstantiatedOutput(NormWA & normWARef)
{
ItemExpr * instNull;
for (CollIndex index = 0;
index < nullInstantiatedOutput().entries(); index++)
{
instNull = nullInstantiatedOutput()[index].getItemExpr();
CMPASSERT(instNull->getOperatorType() == ITM_INSTANTIATE_NULL);
// Replace the existing child of the InstantiateNull with
// its normalized form.
instNull->child(0) = instNull->child(0)->normalizeNode(normWARef);
} // endfor
//++MV
// Used for translating the required sort key to the right
// child sort key and backwards
BuildRightChildMapForLeftJoin();
//--MV
} // Join::normalizeNullInstantiatedOutput()
// -----------------------------------------------------------------------
// Join::normalizeNullInstantiatedForRightJoinOutput()
//
// A method for normalizing the operands of an InstantiateNull operator
// that appears in the nullInstantiatedForRightJoinOutput(). A
// special method is necessary to prevent an InstantiateNull that
// appears in this list from being replaced with a VEGReference for the
// VEG to which it belongs.
// -----------------------------------------------------------------------
void Join::normalizeNullInstantiatedForRightJoinOutput(NormWA & normWARef)
{
ItemExpr * instNull;
for (CollIndex index = 0;
index < nullInstantiatedForRightJoinOutput().entries(); index++)
{
instNull = nullInstantiatedForRightJoinOutput()[index].getItemExpr();
CMPASSERT(instNull->getOperatorType() == ITM_INSTANTIATE_NULL);
// Replace the existing child of the InstantiateNull with
// its normalized form.
instNull->child(0) = instNull->child(0)->normalizeNode(normWARef);
} // endfor
//++MV
// Used for translating the required sort key to the right
// child sort key and backwards
BuildLeftChildMapForRightJoin();
//--MV
} // Join::nullInstantiatedForRightJoinOutput()
// -----------------------------------------------------------------------
// Join::leftLinearizeJoinTree()
//
// A left-linear tree of Inner Joins is one in which no Inner Join
// has another Inner Join as its right child. This method implements
// a transformation rule that produces a left-linear tree of Inner Joins.
// It replaces, if possible, T1 IJ (T2 IJ T3) with a left-linear sequence
// T1 IJ T2 IJ T3.
//
// The figure below assumes that subtree L2 and subtree R2 do not have
// and Inner Join as the topmost node.
// Pattern before the transformation:
//
// Inner Join #1 : p1
// / \
// subtree L1 : p2 Inner Join #2 : p5
// / \
// subtree L2 : p3 subtree R2 : p4
//
// NOTE: p1,p2,p3,p4,p5 are predicates
//
// Left linear tree produced by this transformation:
//
// Inner Join #2 : p1 & p5 (we attempt to push down these predicates, so they may end up in the children)
// / \
// Inner Join #1 subtree R2 : p4
// / \
// subtree L1 : p2 subtree L2 : p3
//
// -----------------------------------------------------------------------
Join * Join::leftLinearizeJoinTree(NormWA & normWARef,
TransformationType transformationType)
{
// Don't do this transformation if the user said they want the
// join order to be completely determined by the order the
// tables are specified in the query.
if (CURRSTMT_OPTDEFAULTS->joinOrderByUser())
return this;
// Condition for applying this rule:
// I and my right child must both be an Inner Join.
if ( (getOperatorType() != REL_JOIN) OR
( (child(1)->getOperatorType() != REL_JOIN) AND
(child(1)->getOperatorType() != REL_ROUTINE_JOIN) AND
(child(1)->getOperatorType() != REL_LEFT_JOIN) ) )
return this;
// R1 is my current right child
Join * R1 = (Join *)(child(1).getPtr());
// Left linearize R1
R1->leftLinearizeJoinTree(normWARef, transformationType);
// Assign the left child of R1 to become my new right child
child(1) = R1->child(0);
// If we pulled anything up or R1 has a join predicate, we need to
// run recursive pushdown at the RelRoot to make sure we don't end up
// with predicates on unions and TSJs. This will happen at the end
// of the SQO phase so we don't do any unnecessary tree walks.
if ((!selectionPred().isEmpty() || !R1->joinPred().isEmpty() ) &&
(R1->child(0)->getOperatorType() == REL_UNION ||
R1->child(1)->getOperatorType() == REL_UNION))
normWARef.setRequiresRecursivePushdown(TRUE);
// Pull up predicates so that VEGPredicates and predicates that contain
// VEGReferences can potentially be distributed more extensively amongst
// my subtrees.
R1->selectionPred() += getSelectionPred();
selectionPred().clear();
// R1 inherits all the values that I received as inputs and should
// produce all the values that I was producing as output.
R1->getGroupAttr()->setCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
R1->getGroupAttr()->setCharacteristicOutputs
(getGroupAttr()->getCharacteristicOutputs());
// Recompute my own Inputs and Outputs.
primeGroupAttributes();
// Temporarily set the left child after the rotation so that I could push
// predicate down to it before starting left linearization again (due to
// the new right child).
//
R1->child(0) = this;
ValueIdSet availableInputs;
availableInputs = R1->getGroupAttr()->getCharacteristicInputs();
// If this method is being called during subquery unnesting
// then logical properies need to be resynthesized and
// pushdown rules are slightly different
if (transformationType == UNNESTING)
{
ValueIdSet outerReferences ;
availableInputs.getOuterReferences(outerReferences);
availableInputs -= outerReferences ;
ValueIdSet nonPredExpr;
if (R1->getOperatorType() == REL_ROUTINE_JOIN)
nonPredExpr += R1->child(1)->getGroupAttr()->getCharacteristicInputs() ;
R1->pushdownCoveredExprSQO(R1->getGroupAttr()->getCharacteristicOutputs(),
availableInputs,
R1->selectionPred(),
nonPredExpr,
TRUE, // keepPredsNotCoveredByLeftChild
TRUE); // keepPredsNotCoveredByRightChild
R1->getGroupAttr()->clearLogProperties();
getGroupAttr()->clearLogProperties();
R1->synthLogProp();
}
else if (transformationType == SEMI_JOIN_TO_INNER_JOIN)
{
R1->pushdownCoveredExpr(R1->getGroupAttr()->getCharacteristicOutputs(),
availableInputs,
R1->selectionPred());
R1->getGroupAttr()->clearLogProperties();
getGroupAttr()->clearLogProperties();
R1->synthLogProp();
}
else
{
// Pushdown predicates that were pulled up
R1->pushdownCoveredExpr(R1->getGroupAttr()->getCharacteristicOutputs(),
availableInputs,
R1->selectionPred());
}
// I must left-linearize myself once again because I have acquired a new
// right child.
//
R1->child(0) = leftLinearizeJoinTree(normWARef, transformationType);
return R1; // the tree was indeed left linearized
} // Join::leftLinearizeJoinTree()
// -----------------------------------------------------------------------
// Join::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * Join::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
NABoolean isATSJ = isTSJ();
RelExpr * normalizedExpr = this; // default return value
//--------------------------------------------------------------------------------
// Create filternode on top of grandchild of a subquery TSJ to prevent pushdown
// of predicates. This is needed if the correlated subquery will be unnested.
//--------------------------------------------------------------------------------
if (candidateForSubqueryUnnest() &&
(child(1)->getOperatorType() == REL_GROUPBY))
{
createAFilterGrandChildIfNeeded(normWARef);
}
// -----------------------------------------------------------------
// Perform predicate pushdown.
// -----------------------------------------------------------------
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred());
if (CmpCommon::getDefault(NOT_IN_OUTER_OPTIMIZATION) == DF_ON)
{
//rewrite notin predicate
rewriteNotInPredicate();
}
// -----------------------------------------------------------------
// Normalize the left subtrees. Store pointers to the
// roots of the subtrees after normalization.
// -----------------------------------------------------------------
if (isFullOuterJoin())
normWARef.locateAndSetVEGRegion(this, 0 /* first child */);
child(0) = child(0)->normalizeNode(normWARef);
if (isFullOuterJoin())
normWARef.restoreOriginalVEGRegion();
// -----------------------------------------------------------------
// Normalize the right subtree in the proper VEGRegion
// -----------------------------------------------------------------
if (isLeftJoin() OR isAntiSemiJoin() OR isFullOuterJoin())
{
// -------------------------------------------------------------
// Locate and set the VEGRegion for the right subtree.
// -------------------------------------------------------------
if (isFullOuterJoin())
normWARef.locateAndSetVEGRegion(this, 1 /* second child */);
else
normWARef.locateAndSetVEGRegion(this);
child(1) = child(1)->normalizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
}
else
{
child(1) = child(1)->normalizeNode(normWARef);
}
fixEssentialCharacteristicOutputs();
// -----------------------------------------------------------------
// Transform a bushy tree of inner joins or a subtree in which
// a left join is the right child of an inner join into a
// left associative linear sequence of join. Note that TSJs are
// not transformed.
// -----------------------------------------------------------------
normalizedExpr = leftLinearizeJoinTree(normWARef);
// ---------------------------------------------------------------------
// Convert a tsj to a join if the tsj is not for a write operation
// and if a value that is produced by the left subtree is not
// referenced in the right subtree,
// ---------------------------------------------------------------------
if (isATSJ AND NOT isTSJForWrite() AND
NOT child(1)->getGroupAttr()->
getCharacteristicInputs().referencesOneValueFromTheSet
(child(0)->getGroupAttr()->getCharacteristicOutputs())
&& !getInliningInfo().isDrivingPipelinedActions()
&& !getInliningInfo().isDrivingTempInsert() // Triggers -
&& !(isRoutineJoin() &&
child(1).getGroupAttr()->getHasNonDeterministicUDRs())
)
{
// Remember we used to be a RoutineJoin. This is used to determine
// what type of contexts for partitioning we will try in OptPhysRel.
if (isRoutineJoin())
setDerivedFromRoutineJoin();
convertToNotTsj();
// ---------------------------------------------------------------
// Transform a bushy tree of inner joins or a subtree in which
// a left join is the right child of an inner join into a
// left associative linear sequence of join.
// ---------------------------------------------------------------
normalizedExpr = leftLinearizeJoinTree(normWARef);
}
normWARef.setExtraHubVertex(normalizedExpr);
return normalizedExpr;
} // Join::normalizeNode()
//--------------------------------------------------------------------------
// Join::createAFilterGrandChildIfNecessary()
// This filter node is created (if necessary) after transform but before
// normalization. Therefore inputs are minimal but outputs are maximal. Any
// predicates with outerreferences will be as high up in the tree as possible.
//---------------------------------------------------------------------------
void Join::createAFilterGrandChildIfNeeded(NormWA & normWARef)
{
// caller has already verified that child(1) is a groupby
CMPASSERT(child(1)->getOperatorType() == REL_GROUPBY) ;
Filter *predFilterNode = NULL;
NABoolean doNotUnnest = FALSE ;
GroupByAgg * gbyNode = (GroupByAgg *) child(1)->castToRelExpr();
RelExpr * oldRightGrandChild = child(1)->child(0)->castToRelExpr();
NABoolean candidateForLeftJoin = candidateForSubqueryLeftJoinConversion();
NABoolean nestedAggInSubQ = FALSE;
GroupByAgg * subQGby = NULL ;
if (oldRightGrandChild->getOperator().match(REL_GROUPBY))
{
subQGby = (GroupByAgg *) oldRightGrandChild ;
oldRightGrandChild = oldRightGrandChild->child(0)->castToRelExpr();
nestedAggInSubQ = TRUE ;
}
if (oldRightGrandChild->getOperator().match(REL_ANY_SEMIJOIN) ||
oldRightGrandChild->getOperator().match(REL_ANY_ANTI_SEMIJOIN) ||
oldRightGrandChild->getOperator().match(REL_GROUPBY))
{
// we do not want to unnest queries that have a semijoin or a group by
// as a child of the groupby.
doNotUnnest = TRUE;
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Right grandchild of TSJ is a semijoin or has more than one group by");
}
}
// -----------------------------------------------------------------------
// Check to see if we have any Outer References in our selection predicate
// If we do we want to create a Filter Node on top of ourselves to hold
// the Outer Reference predicate.
// ------------------------------------------------------------------------
ValueIdSet outerReferences, nonLocalPreds;
gbyNode->getGroupAttr()->getCharacteristicInputs().
getOuterReferences(outerReferences);
// We found that for left joins, we don't want to pull up correlated
// predicates from the selection predicate if there is also correlated
// predicates in the join preidcate. This is a fix for solution
// 10-090206-8977.
if ( (doNotUnnest == FALSE) &&
oldRightGrandChild->getOperator().match(REL_ANY_LEFT_JOIN))
{
Join *myself = (Join *) oldRightGrandChild;
if (((Join *) oldRightGrandChild)->joinPred().
getReferencedPredicates(outerReferences, nonLocalPreds))
{
doNotUnnest = TRUE;
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Filter child is leftJoin with outerreferences in joinPred ");
}
}
}
if (doNotUnnest == FALSE)
{
nonLocalPreds.clear();
oldRightGrandChild->selectionPred().getReferencedPredicates
(outerReferences, nonLocalPreds) ;
if (nestedAggInSubQ)
subQGby->selectionPred().getReferencedPredicates
(outerReferences, nonLocalPreds);
if (!nonLocalPreds.isEmpty())
{
// Right grandchild selection pred has outer references
// Like the case for the joinpredicates above, we need to
// make sure we don't create a filter with aggregates in it.
// The problem we run into is if the groupBy that produced the
// aggregate gets moved above the join.
if (candidateForLeftJoin ||
oldRightGrandChild->getOperator().match(REL_ANY_LEFT_JOIN))
{
for ( ValueId filterVid = nonLocalPreds.init();
nonLocalPreds.next(filterVid) ;
nonLocalPreds.advance(filterVid))
{
// Check to see if the filter predicates contains any
// aggregates, if so do not create filter
if (filterVid.getItemExpr()->containsAnAggregate())
{
doNotUnnest = TRUE;
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Filter preds would have contained aggregates ");
}
}
}
}
if ((doNotUnnest == FALSE) && candidateForLeftJoin &&
(CmpCommon::getDefault(SUBQUERY_UNNESTING_P2) != DF_INTERNAL) &&
((normWARef.getLeftJoinConversionCount() >= 2)||nestedAggInSubQ))
{
doNotUnnest = TRUE;
// For phase 2 we only unnest 2 subqueries
// containing NonNullRejecting Predicates. Later we will ensure
// that these 2 subqueries are not nested.
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
if (!nestedAggInSubQ)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Skipping unnesting of Subquery due to NonNullRejecting Predicates in more than two subqueries");
else
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Skipping unnesting of Subquery since we have both NonNullRejecting predicate and nested aggregate in subquery.");
}
}
// create the filter node
if (doNotUnnest == FALSE)
{
predFilterNode = new (CmpCommon::statementHeap())
Filter(oldRightGrandChild);
predFilterNode->selectionPred() += nonLocalPreds;
oldRightGrandChild->selectionPred() -= nonLocalPreds;
if (nestedAggInSubQ)
{
subQGby->selectionPred() -= nonLocalPreds;
predFilterNode->getGroupAttr()->setCharacteristicInputs
(subQGby->getGroupAttr()->getCharacteristicInputs());
subQGby->recomputeOuterReferences();
}
else
{
predFilterNode->getGroupAttr()->setCharacteristicInputs
(oldRightGrandChild->getGroupAttr()->getCharacteristicInputs());
}
oldRightGrandChild->recomputeOuterReferences();
// If the nodes below us require the same outer references as inputs
// as before we don't want to do the unnesting
if (oldRightGrandChild->getGroupAttr()->getCharacteristicInputs() ==
predFilterNode->getGroupAttr()->getCharacteristicInputs())
{
// disassociate the oldGrandChild from the Filter
predFilterNode->child(0) = (RelExpr *) NULL;
// put the predicate back.
oldRightGrandChild->selectionPred() += nonLocalPreds;
// remember that we decided not to unnest.
doNotUnnest = TRUE;
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Skipping unnesting of Subquery due to subtree below filter requires same outer references as filter");
}
else
{
// Recompute inputs/outputs
oldRightGrandChild->primeGroupAttributes();
predFilterNode->primeGroupAttributes();
if (candidateForLeftJoin)
normWARef.incrementLeftJoinConversionCount();
if (nestedAggInSubQ)
gbyNode->child(0)->child(0) = predFilterNode;
else
gbyNode->child(0) = predFilterNode;
}
}
}
else
{
// right grandchild has no outer refs in selection pred.
// Look in the groupby node now
if (gbyNode->selectionPred().getReferencedPredicates
(outerReferences, nonLocalPreds) ||
gbyNode->aggregateExpr().getReferencedPredicates
(outerReferences, nonLocalPreds))
{
// we know group expr is empty as this is a scalar grby.
// do nothing as we have something to unnest (i.e. do not set the doNotUnnest flag)
// unless we need Phase2 and we have already matrked one level.
if (candidateForLeftJoin &&
(CmpCommon::getDefault(SUBQUERY_UNNESTING_P2) != DF_INTERNAL) &&
(normWARef.getLeftJoinConversionCount() >= 1))
{
doNotUnnest = TRUE;
// For phase 2 we only unnest 1 level of subqueries
// containing NonNullRejecting Predicates
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Skipping unnesting of Subquery due to NonNullRejecting Predicates in more than one subquery");
}
}
else
{
// no outer ref in grandchild's selection pred and in grby's (child) selection pred or
// aggregate expr.
doNotUnnest = TRUE;
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: No Correlation found");
}
}
}
if (doNotUnnest)
{
setCandidateForSubqueryUnnest(FALSE);
normWARef.decrementCorrelatedSubqCount();
}
return ;
}
// Join::createAFilterGrandChildIfNecessary()
/* --------------------------------------------------------------------------
Join::eliminateRedundantJoin()
-----------------------------------------------------------------------------
Performs one of the following transformations, if this node is suitably marked
1) If predicates have been marked for removal
Join {selection_pred : p1,p2,p3,...pn} ----> Join {selection_pred : p3,...pn}
where p1 and p2 are equi join predicates that are known to be true due to a
foreign_key-unique_key relationship
2) If the children of the join are marked for removal
parent
| parent
Join |
/ \ ------> X
X Y
where the node Y has been marked for elimination by the synthLogPhase. Note that
instead of node Y, node X may also be marked for elimination and a similar
transformation is performed in that case too.
3) If its a left join and has been markedForElimination by the normalize phase
then
parent
| parent
LeftJoin |
/ \ ------> X
X Y
Note that in this case, it is only possible to eliminate the right child. */
RelExpr* Join::eliminateRedundantJoin(NormWA &normWARef)
{
if (getOperatorType() == REL_JOIN)
{
RelExpr *result = NULL;
GroupAttributes *ga = NULL;
selectionPred() -= getPredicatesToBeRemoved();
equiJoinPredicates_ -= getPredicatesToBeRemoved();
clearPredicatesToBeRemoved();
if ((child(1).getPtr())->markedForElimination())
{
result = child(0);
ga = child(1)->getGroupAttr();
}
else if ((child(0).getPtr())->markedForElimination())
{
result = child(1);
ga = child(0)->getGroupAttr();
}
if (result)
{
CMPASSERT(selectionPred().isEmpty() && joinPred().isEmpty());
NABoolean found = FALSE;
TableDesc *tabDesc = NULL;
const ValueIdSet &constraints = ga->getConstraints();
for (ValueId id = constraints.init();
constraints.next(id) && NOT found;
constraints.advance(id) )
{
if (id.getItemExpr()->getOperatorType() == ITM_COMP_REF_OPT_CONSTRAINT)
{
ComplementaryRefOptConstraint * compRIConstraint =
(ComplementaryRefOptConstraint *) id.getItemExpr();
if (compRIConstraint->getIsMatchedForElimination())
{
tabDesc = compRIConstraint->getTableDesc();
found = TRUE;
}
}
}
CMPASSERT(found);
const ValueIdList &allCols = tabDesc->getColumnList();
for (CollIndex i = 0; i < allCols.entries(); i++)
{
ItemExpr *ie = allCols[i].getItemExpr();
CMPASSERT(ie->getOperatorType() == ITM_BASECOLUMN)
const ValueIdSet &eic = ((BaseColumn *)ie)->getEIC();
normWARef.deleteVEGMember(((BaseColumn *)ie)->getValueId());
for (ValueId eqVid = eic.init(); eic.next(eqVid); eic.advance(eqVid))
normWARef.deleteVEGMember(eqVid);
}
return result;
}
}
else if (markedForElimination() && (getOperatorType() == REL_LEFT_JOIN))
{
TableDescList tableDescs(CmpCommon::statementHeap());
child(1)->getAllTableDescs(tableDescs);
CMPASSERT(tableDescs.entries() != 0);
normWARef.locateAndSetVEGRegion(this);
for (CollIndex j = 0; j < tableDescs.entries(); j++)
{
const ValueIdList &allCols = tableDescs[j]->getColumnList();
for (CollIndex i = 0; i < allCols.entries(); i++)
{
ItemExpr *ie = allCols[i].getItemExpr();
CMPASSERT(ie->getOperatorType() == ITM_BASECOLUMN)
const ValueIdSet &eic = ((BaseColumn *)ie)->getEIC();
normWARef.deleteVEGMember(((BaseColumn *)ie)->getValueId());
for (ValueId eqVid = eic.init(); eic.next(eqVid); eic.advance(eqVid))
normWARef.deleteVEGMember(eqVid);
}
}
normWARef.restoreOriginalVEGRegion();
return child(0) ; // outer joins
}
return this;
} // Join::eliminateRedundantJoin()
void RelExpr::getAllTableDescs(TableDescList &tableDescs)
{
Int32 arity = getArity();
if (arity == 0)
{
switch (getOperatorType())
{
case REL_SCAN:
tableDescs.insert(((Scan *)this)->getTableDesc());
break;
case REL_STORED_PROC:
tableDescs.insert(((TableValuedFunction *)this)->getTableDesc());
break;
default:
break;
}
}
else
{
for (Int32 i = 0; i < arity; i++)
{
child(i)->getAllTableDescs(tableDescs);
}
}
}
/*-------------------------------------------------------------------------
Join::transformSemiJoin()
---------------------------------------------------------------------------
This method transforms a semi join to an inner join.
a) In the simplest case, which is enabled by default the right child is
unique in the joining column and the semi join can be simply translated
into a join. An example query is
select t1.a
from t1
where t1.b in (select t2.a
from t2) ;
Here t2.a is a unique key of table t2.
The following transformation is made
Semi Join {pred : t1.b = t2.a} Join {pred : t1.b = t2.a}
/ \ -------> / \
/ \ / \
Scan t1 Scan t2 Scan t1 Scan t2
b) If the right child is not unique in the joining column then
we transform the semijoin into an inner join followed by a groupby
as the join's right child. This transformation is enabled by default
only if the right side is an IN list or if the groupby's reduction
ratio is greater than 5.0, otherwise a CQD has to be used.
Examples:
select t1.a
from t1
where t1.b in (1,2,3,4,...,101) ;
Semi Join {pred : t1.b = InList.col} Join {pred : t1.b = InList.col}
/ \ -------> / \
/ \ / \
Scan t1 TupleList Scan t1 GroupBy {group cols: InList.col}
|
|
TupleList
select t1.a
from t1
where t1.b in (select t2.c from t2 where whatever) ;
Semi Join {pred : t1.b = t2.c } Join {pred : t1.b = t2.c}
/ \ -------> / \
/ \ / \
Scan t1 Scan t2 Scan t1 GroupBy {group cols: t2.c}
|
|
Scan t2
*/
RelExpr* Join::transformSemiJoin(NormWA& normWARef)
{
// SQO is called in a loop sometimes.
// We do not wish to apply this transformation more than once.
setCandidateForSemiJoinTransform(FALSE);
// If there are no equijoins or if there is some correlation,
// this transformation cannot be applied.
if ((getOperatorType() == REL_SEMITSJ) ||
getEquiJoinPredicates().isEmpty())
{
return this ;
}
// apply the transformation described in item a) above
ValueIdSet equiJoinCols1 = getEquiJoinExprFromChild1();
if ((NOT equiJoinCols1.isEmpty()) &&
child(1)->getGroupAttr()->isUnique(equiJoinCols1))
{
RelExpr * linearizedExpr = this ;
// in this case no additional groupBy is necessary,
// simply changing semijoin --> join
// will suffice.
setOperatorType(REL_JOIN) ;
// move prds from joinPred to selection pred.
selectionPred() += joinPred();
joinPred().clear() ;
linearizedExpr = leftLinearizeJoinTree(normWARef,
SEMI_JOIN_TO_INNER_JOIN);
return linearizedExpr ;
}
/* Apply the transformation described in item b) above.
The transformation below is done if there are no non-equijoin preds either
and the inner side has no base tables (i.e. is an IN LIST) OR if the groupby
is expected to provide a reduction > SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO
(default is 5.0) OR the inner row count is small OR if we have used a CQD to
turn this transformation on. Some rationale: A data reduction might reduce
the amount of data for the inner table of a hash join (or it might not!
hash-semi-join sometimes does duplicate elimination itself, but not always).
Converting to a join allows the join to be commuted; if the number of rows
is small, nested join might be profitably chosen in that case. */
ValueIdSet preds ;
preds += joinPred();
preds += selectionPred();
preds -= getEquiJoinPredicates() ;
EstLogPropSharedPtr innerEstLogProp = child(1)->getGroupAttr()->outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP));
CostScalar innerRowCount = innerEstLogProp->getResultCardinality();
CostScalar innerUec = innerEstLogProp->getAggregateUec(equiJoinCols1);
NABoolean haveSignificantReduction = FALSE;
CostScalar reductionThreshold =
((ActiveSchemaDB()->getDefaults()).getAsDouble(SEMIJOIN_TO_INNERJOIN_REDUCTION_RATIO));
NABoolean noInnerStats = innerEstLogProp->getColStats().containsAtLeastOneFake();
// have a valid value of uec, have something other than default
// cardinality and satisfy reduction requirement.
if ((innerUec > 0) && (!noInnerStats) &&
(innerRowCount/innerUec > reductionThreshold))
haveSignificantReduction = TRUE;
CostScalar innerAllowance =
((ActiveSchemaDB()->getDefaults()).getAsDouble(SEMIJOIN_TO_INNERJOIN_INNER_ALLOWANCE));
NABoolean haveSmallInner = FALSE;
if ((innerRowCount < innerAllowance) && (!noInnerStats))
haveSmallInner = TRUE;
if (preds.isEmpty() &&
((child(1)->getGroupAttr()->getNumBaseTables() == 0) ||
haveSignificantReduction ||
haveSmallInner ||
(CmpCommon::getDefault(SEMIJOIN_TO_INNERJOIN_TRANSFORMATION) == DF_ON)))
{
CollHeap *stmtHeap = CmpCommon::statementHeap() ;
setOperatorType(REL_JOIN) ;
// we need a group by below the transformed join
GroupByAgg *newGrby = new (stmtHeap) GroupByAgg(
child(1)->castToRelExpr()) ;
newGrby->setGroupAttr(new (stmtHeap)
GroupAttributes(*(child(1)->getGroupAttr())));
// must reset numJoinedTables_; we might be copying GroupAttributes from a join
newGrby->getGroupAttr()->resetNumJoinedTables(1);
newGrby->getGroupAttr()->clearLogProperties();
newGrby->setGroupExpr(equiJoinCols1);
child(1) = newGrby ;
newGrby->synthLogProp(&normWARef);
// move preds from joinPred to selection pred.
selectionPred() += joinPred();
joinPred().clear() ;
//synthesize logical props for the new nodes.
return this ;
}
return this ; // semijoin has non-equijoin predicates or this
// transformation is OFF
} // Join::transformSemiJoin()
// -----------------------------------------------------------------------
// copyNode()
// This method creates a copy of the original RelExpr.
// Sideffects: no change to the old Node.
// newNode will have new a new groupAttrib structure allocated
// and initialized with the information from the old one.
// Similarly the newNode will initialize its RETDesc to the
// same as the oldNode.
// -----------------------------------------------------------------------
static RelExpr * copyNode(RelExpr* oldNode, CollHeap* heap)
{
RelExpr* newNode = oldNode->copyTopNode(NULL, heap);
newNode->setGroupAttr(new (heap)
GroupAttributes(*(oldNode->getGroupAttr())));
newNode->setRETDesc(oldNode->getRETDesc());
newNode->getGroupAttr()->setLogExprForSynthesis(newNode);
return newNode;
} // copyNode()
// -----------------------------------------------------------------------
// copyNodeAndSetChildren()
// This method creates a copy of the original RelExpr and also initializes
// the copy's children to be identical to that of the original.
// Sideffects: no change to the old Node.
// see sideffects of copyNode.
// newNode will have its children initialized to the same
// as that of the original node.
// -----------------------------------------------------------------------
static RelExpr * copyNodeAndSetChildren(RelExpr* oldNode, CollHeap* heap)
{
RelExpr* newNode = copyNode(oldNode,heap);
for(Int32 i = 0; i < oldNode->getArity(); i++)
{
newNode->child(i) = oldNode->child(i) ;
}
return newNode;
} // copyNodeAndSetChildren()
// -----------------------------------------------------------------------
// Join::pullUpPredsWithAggrs()
//
// For certain PullUpGroupBy and all MoveUpGroupby transformations
// a GroupBy node moves over a Join node (i.e. the GroupBy which
// used to be a child of the Join, now becomes its parent). For
// such a tree transformation to work, any predicate in the Join
// that references aggregates in the GroupBy must now be moved into
// the GroupBy node. This method performs this task.
//
//
// Sideffects: Will move join selection predicates that contains
// aggregates into the grbyNode.
//
// Returns: TRUE: if we can move the aggregates from the join's selection
// predicate to the groupBy's selection predicate.
// FALSE: If the join contains join predicates and the join
// predicates contains aggregates expressions from the groupBy.
// -----------------------------------------------------------------------
NABoolean Join::pullUpPredsWithAggrs(GroupByAgg* grbyNode, MapValueIds * mapNode)
{
// We need to check Left Joins too, but we cannot pull any predicates
// up from the join preds, so if we find aggregates in the join preds
// indicate a failure so we do not put the groupBy on top of the join.
if (NOT joinPred().isEmpty())
{
ValueIdSet predicatesThatNeedsToBePulled;
if (joinPred().getReferencedPredicates
(grbyNode->aggregateExpr(), predicatesThatNeedsToBePulled))
{
// Skip such this subquery .
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Join has aggregates in its predicates.");
}
return FALSE;
}
}
if (NOT selectionPred().isEmpty())
{
if (mapNode == NULL)
{
ValueIdSet predicatesToPullUp;
if (selectionPred().getReferencedPredicates
(grbyNode->aggregateExpr(), predicatesToPullUp))
{
selectionPred() -= predicatesToPullUp ;
grbyNode->selectionPred() += predicatesToPullUp ;
}
}
else
{
ValueIdMap *copyOfMap = new (CmpCommon::statementHeap())
ValueIdMap(mapNode->getMap());
for (ValueId vid = selectionPred().init();
selectionPred().next(vid);
selectionPred().advance(vid))
{
ValueId bottomMapId;
copyOfMap->rewriteValueIdDown(vid, bottomMapId);
// Only if our outputs will actually be different, do we want to
// create a map.
if ( vid != bottomMapId )
{
ValueId ignoreVid;
ValueIdSet mapPullUpPred( bottomMapId);
ValueIdSet mapPredicatesToPullUp;
if (mapPullUpPred.getReferencedPredicates
(grbyNode->aggregateExpr(), mapPredicatesToPullUp))
{
selectionPred() -= vid;
grbyNode->selectionPred() += mapPredicatesToPullUp ;
if (getGroupAttr()->getCharacteristicOutputs().
referencesTheGivenValue(vid, ignoreVid, FALSE,FALSE))
{
// Need to add a Map Entry in the MapNode
mapNode->addMapEntry(vid, bottomMapId);
}
}
}
}
}
}
return TRUE;
} // Join::pullUpPredsWithAggrs()
// -----------------------------------------------------------------------
// GroupByAgg::computeGroupExpr()
//
// The group expression for a pulledUp or movedUp GroupBy node
// is computed from a seed ValueIdSet and a superSet ValueIdSet.
// The seed valueId set is that starting set of values that need
// to be in the groupExpr. For the pullUpGroupBy transformation this
// is the set of uniqueCols from the left child. The superSet is then
// used to add more values to the groupExpr. Items in the output or
// having clause that are referenced by items in the superset are also
// added to the groupExpr.
//
// Sideffects: Changes the groupBy's group expression
// -----------------------------------------------------------------------
void GroupByAgg::computeGroupExpr(const ValueIdSet& seed,
ValueIdSet& superSet,
NormWA& normWARef)
{
ValueIdSet duplicates;
ValueIdSet reqGrpValues = seed ;
reqGrpValues += leftUniqueExpr() ;
reqGrpValues.accumulateReferencedValues(
superSet, selectionPred());
reqGrpValues.accumulateReferencedValues(
superSet, getGroupAttr()->getCharacteristicOutputs());
// Need to make sure we retain original groupExpr() for
// cases where we have a semijoin->GroupBy->Filter
// In this case the group Expression will not be empty
// initially like it is for ScalarAggs, and so we have to
// make sure we keep it. However due to moveUpGroupByTransformation
// we have to make sure we remove duplicates..
duplicates = reqGrpValues.intersect(groupExpr());
if (duplicates.isEmpty())
reqGrpValues += groupExpr();
else
reqGrpValues -= duplicates;
addGroupExpr(reqGrpValues);
groupExpr().normalizeNode(normWARef) ;
} // GroupByAgg::computeGroupExpr()
/*-----------------------------------------------------------------------
Join::pullUpGroupByTransformation()
// The PullUpGroupBy transformation is one of the two main transformations
// aplied while unnesting a subquery. For a single level subquery this is
// the only transformation required for subquery unnesting.
// X and Y denote arbitatry RelExprs.
// The TSJ has to be one introduced while flattening out a subquery
// in the Transform phase. Under some circumstance the TSJ can be transformed
// into a Join by the time it gets to this method. The Filter node is
// introduced during Normalization to prevent pushdown of predicates with
// outerReferences
//
// TSJ GroupBy {pred3}(grouping cols:
// / \ | cluster_key of X (leftUniqueCols)+
// / \ | other necessary columns of X)
// X ScalarAgg {pred3} --> Join {pred2}
// | / \
// | / \
// Filter {pred2} X Y {pred1}
// |
// |
// Y {pred1}
//
//The same tree as above but in terms of the local variables used in the code below
//
// this newGrby {pred3}(grouping cols:
// / \ | cluster_key of newLeftChild (leftUniqueCols)+
// / \ | other necessary columns of newLeftChild)
//oldLeft oldGB {pred3}
//Child --> newJoin {pred2}
// | / \
// | / \
// Filter {pred2} newLeft newRight {pred1}
// | Child Child
// |
// oldGBGrandChild {pred1}
//
//
// Expects: RelExpr tree as seen above to the left.
// Sideffects: if successful, returns a new groupBy with the
// a copy of join as the child. The original tree has not changed.
// The predicates in the new groupBy and the new Join will have
// changed according to the comments above.
//
// If there is an explicit groupby in the subquery the transformation above is extended as
// TSJ GroupBy {pred3,agg2(agg1)}(grouping cols:
// / \ | cluster_key of X (leftUniqueCols)+
// / \ | other necessary columns of X)
// X ScalarAgg {pred3} --> SubQ_GroupBy {agg1} (grouping cols: g1 +
// | {agg2(agg1)} | cluster_key of X (leftUniqueCols) +
// | | other necessary columns of X)
// SubQ_GroupBy {agg1} newJoin {pred2}
// | {grouping cols: g1} / \
// | / \
// Filter {pred2} newLeft newRight {pred1}
// | Child Child
// |
// Y {pred1}
//
// If there is an explicy groupby in the subquery then the flag nestedAggInSubQ is set.
------------------------------------------------------------------------------*/
GroupByAgg* Join::pullUpGroupByTransformation(NormWA& normWARef)
{
CollHeap *stmtHeap = CmpCommon::statementHeap() ;
RelExpr *oldGB = child(1)->castToRelExpr();
// note that typically child of oldGB is actually a Filter node, here
// oldGBgrandchild is the child of oldGB before the Filter was added.
RelExpr *oldGBgrandchild ;
NABoolean nestedAggInSubQ = FALSE;
if ((oldGB->child(0)->getOperatorType() == REL_GROUPBY) &&
(oldGB->child(0)->child(0)->getOperatorType() == REL_FILTER))
{
oldGBgrandchild = oldGB->child(0)->child(0)->child(0)->castToRelExpr();
nestedAggInSubQ = TRUE;
}
else if (oldGB->child(0)->getOperatorType() == REL_FILTER)
oldGBgrandchild = oldGB->child(0)->child(0)->castToRelExpr();
else
oldGBgrandchild = oldGB->child(0)->castToRelExpr();
RelExpr *filterParent = nestedAggInSubQ ?
oldGB->child(0)->castToRelExpr() : oldGB;
RelExpr *oldLeftChild = child(0)->castToRelExpr();
// Determine a set of unique columns for the left sub-tree.
// Note: Scans and joins synthesize uniqueness constraints even for
// columns that are not in the characteristic outputs. Other
// operators such as groupby or union don't. We make use of these
// extra uniqeness constraints here. Any needed columns not yet
// added to the characteristic outputs will be added later, in
// method getMoreOutputsIfPossible().
ValueIdSet leftUniqueCols ;
if (NOT (child(0)->getGroupAttr()->findUniqueCols(leftUniqueCols)))
{
// Could not find a set of unique cols.
// If the left sub-tree contains a UNION/TRANSPOSE/SEQUENCE or SAMPLE
// then we will fail to unnest the subquery for this reason.
filterParent->eliminateFilterChild();
// left child does not have a unique constraint
// cannot unnest this subquery
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Left child does not have a unique constraint");
// Things to consider (referring to the picture above): If the all of the
// following are true:
// * {pred2} has only equals/VEG predicates of the form X.col = Y.col
// * {aggr} does not have any outer references
// * {pred3} does not have any outer references
//
// then we could do an alternative transformation, not yet implemented:
//
// TSJ Join {pred2: X.a=Y.b, ...}
// / \ / \
// / \ / \
// X ScalarAgg {pred3} --> X grby {Y.b, ...} {pred3}
// | {aggr} \ {aggr}
// | \
// Filter {pred2: X.a=Y.b, ...} Y {pred1}
// |
// |
// Y {pred1}
//
// Pros: - The groupby is already at a place where it will likely
// end up in the optimal plan
// Cons: - We don't get a nice join backbone with all base tables
//
// Cases where we could attempt this transformation:
// - We fail to find a unique key for X (i.e. we reach here)
// - pred2 has a very high selectivity, making newJoin (in the picture
// at the top of this method) similar to a cartesian product
return NULL ;
}
// if subquery needs left joins some additional checks are done here to
// see if pull up groupby transformation can be done while preserving
// semantic correctness. No changes for left joins or preserving nulls
// is done in this method.
if (candidateForSubqueryLeftJoinConversion())
{
if (NOT selectionPred().isEmpty())
{
// Selection predicates in a Join that needs to be converted to a Left Join
// can be tricky, particularly if they contain aggregates.
// We skip such a subquery for now.
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Join with selectionPreds cannot be converted to LeftJoin.");
}
filterParent->eliminateFilterChild();
return NULL ;
}
}
// make copies of GroupBy, Join, Joins left and right children before
// making any changes. All changes will be made on the copied nodes.
// If for some reason unnesting cannot be completed, the original node
// is returned.
// copy the left child of Join
RelExpr * newLeftChild = copyNodeAndSetChildren(oldLeftChild, stmtHeap);
// copy the right child of Join
RelExpr * newRightChild = copyNodeAndSetChildren(oldGBgrandchild, stmtHeap);
// copy the Join
Join * newJoin = (Join *) copyNode(this, stmtHeap);
newJoin->getGroupAttr()->clearLogProperties(); //logical prop. must be resynthesized
// New GroupBy is a copy of the old Scalar Aggregate
GroupByAgg *newGrby = (GroupByAgg *) copyNode(oldGB, stmtHeap);
newGrby->setRETDesc(getRETDesc());
newGrby->getGroupAttr()->clearLogProperties(); //logical prop. must be resynthesized
GroupByAgg *newSubQGrby = NULL;
if (nestedAggInSubQ)
{
newSubQGrby = (GroupByAgg *) copyNode(oldGB->child(0)->castToRelExpr(),
stmtHeap);
newSubQGrby->getGroupAttr()->clearLogProperties();
}
// For multi-level subqueries it is possible that this Join is
// not a TSJ, but still contains outer references. This happens
// when right child does not need any values from the left child,
// but it does need values from a parent subquery. If the
// selection predicate (or join predicate)
// of this Join needs any aggregate outputs
// from the its old groupBy child, then those predicates need
// to move up to the new parent GroupBy node.
NABoolean safeToPullUpGrby;
safeToPullUpGrby = newJoin->pullUpPredsWithAggrs(newGrby);
if (NOT safeToPullUpGrby )
{
// The join contains aggregates
// Skip such this subquery .
filterParent->eliminateFilterChild();
return NULL ;
}
if (nestedAggInSubQ)
{
safeToPullUpGrby = newJoin->pullUpPredsWithAggrs(newSubQGrby);
if (NOT safeToPullUpGrby )
{
filterParent->eliminateFilterChild();
return NULL ;
}
// inputs of newSubQGroupBy are same as the old
// TSJ/Join that we are replacing. Outputs are join's + aggregates
newSubQGrby->getGroupAttr()->addCharacteristicOutputs
(getGroupAttr()->getCharacteristicOutputs());
newSubQGrby->getGroupAttr()->setCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
newSubQGrby->child(0) = newJoin ;
}
// inputs and outputs of new GroupBy are same as the old
// TSJ/Join that we are replacing
newGrby->getGroupAttr()->setCharacteristicOutputs
(getGroupAttr()->getCharacteristicOutputs());
newGrby->getGroupAttr()->setCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
if (nestedAggInSubQ)
newGrby->child(0) = newSubQGrby;
else
newGrby->child(0) = newJoin ;
// set the grouping cols for new GroupBy
// grouping cols for new GroupBy are
// unique cols of X +
// cols of X that are needed to evaluate its selection pred. +
// cols of X that part of the characteristic outputs
newGrby->setLeftUniqueExpr(leftUniqueCols);
ValueIdSet oldLeftChildOutputs
(oldLeftChild->getGroupAttr()->getCharacteristicOutputs());
newGrby->computeGroupExpr(
leftUniqueCols,
oldLeftChildOutputs,
normWARef
);
if (nestedAggInSubQ)
{
newSubQGrby->getGroupAttr()->
addCharacteristicOutputs(newGrby->groupExpr());
newSubQGrby->computeGroupExpr(newGrby->groupExpr(),
oldLeftChildOutputs,
normWARef);
}
// The newGrby cannot be a scalar groupby under any circumstance
// So if the group expression is empty, add a constant to the
// list of the grouping columns, so that this groupby is not scalar
// i.e. does not produce a NULL value for empty groups.
if (newGrby->groupExpr().isEmpty())
{
ItemExpr *tf = new (stmtHeap) ConstValue(0);
tf->synthTypeAndValueId(TRUE);
newGrby->groupExpr() += tf->getValueId();
}
// connect newJoin to newX and newY
newJoin->child(0) = newLeftChild ;
newJoin->child(1) = newRightChild ;
newJoin->setOperatorType(REL_JOIN) ;
// pull up predicates in filter to newJoin
// do not change the filter itself in case we
// decide to not unnest.
if (oldGB->child(0)->getOperatorType() == REL_FILTER)
newJoin->selectionPred() += oldGB->child(0)->castToRelExpr()->selectionPred();
else if (nestedAggInSubQ &&
oldGB->child(0)->child(0)->getOperatorType() == REL_FILTER)
newJoin->selectionPred() +=
oldGB->child(0)->child(0)->castToRelExpr()->selectionPred();
// If the new GroupBy contains any outer references (i.e. requiredInputs
// that are not provided by the user) then mark it as needing
// the MoveUpGroupBy transformation.
ValueIdSet outerReferences;
newGrby->getGroupAttr()->getCharacteristicInputs().
getOuterReferences(outerReferences);
if (NOT(outerReferences.isEmpty()))
{
if (!nestedAggInSubQ)
newGrby->setRequiresMoveUp(TRUE) ;
else
{
filterParent->eliminateFilterChild();
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: More than 1 level of nested subquery and nested aggregate are both present");
return NULL;
}
}
return newGrby ;
} // Join::pullUpGroupByTransformation()
/*-----------------------------------------------------------------------
GroupByAgg::nullPreservingTransformation()
// The Null preserving transformation is applied to the output of the
// PullUpGroupBy transformation, if the subquery has null preserving
// predicates. According to the the Dayal-Murali algorithm such subqueries
// require a Left Join instead of a Join. The effect of this transformation
// is shown below
//
//
// GroupBy {pred3} MapValueId {topMap:original outputs of GroupBy
// | | bottomMap:new NullInstantiated outputs of GroupBy}
// | |
// Join {SP:pred2} ----------> GroupBy {pred3, aggregateExpr and groupExpr
// / \ | expressed in terms of nullInstantiated output of LeftJoin}
// / \ |
// X Y {pred1} LeftJoin{JP:pred2}
// / \
// / \
// X Y{pred1}
//
// The MapValueId node shown here is present only if the GroupBy has outputs
// from the right side of the Join. The aggregateExpr in the transformed GroupBy
// has new aggregates if the original aggregate contains count or oneTrue
// aggregates.
//
// This method is split in two halfs. This one that does the LeftJoin
// conversion and error checking, and nullPreserveMyExprs() that
// does the aggregate rewriting and nullInstantiation.
//
// Expects: Child of groupBy to be a Join.
// Sideffects: If successfull will convert the join child into a LeftJoin
// with its output from the joins Right child nullInstantiated
// and the groupBy's aggregates rewritten in terms of the
// nullInstantiated outputs of the LeftJoin and in the case
// of the OneTrue aggregate, rewrittien as a count(0).
//
// Another sideffect is that the LeftJoin now will own the
// the VEGregion of the old groupBy. Thus we have now
// changed the original query tree. We remember this in the SqoWA
// (part of the NormWA) so that we can reassign the VEGregion back
// if we have to give up on unnesting this subquery further down
// the road.
------------------------------------------------------------------------------*/
RelExpr* GroupByAgg::nullPreservingTransformation(GroupByAgg* oldGB,
NormWA& normWARef)
{
GroupByAgg * newGrby = this;
Join * newJoin = (Join*) child(0)->castToRelExpr();
RelExpr * newRightChild = newJoin->child(1)->castToRelExpr();
// oldGBgrandchild is going to be the child of the filter from the
// original tree. We use that references because we know that its
// outputs are correct and consistent at this point.
RelExpr *oldGBgrandchild;
if (oldGB->child(0)->getOperatorType() == REL_FILTER)
oldGBgrandchild = oldGB->child(0)->child(0)->castToRelExpr();
else
oldGBgrandchild = oldGB->child(0)->castToRelExpr();
// two checks are performed below to see if this subquery can be unnested
// using left joins. If one of the following is true we do not unnest
// (a) Filter preds without reference to anything from the inner side
// - This typically only happens in multilevel queries
// where the filter predicates are correlated on the outer tables
// but do not refer to anything on the inner. What we have observed
// with these types of queries is that the predicate ends up being
// pulled up, then pushed down again, but when it gets pushed down,
// it will end up on the left hand side instead of the right - where
// it came from.
//
// (b) Aggregate Expr and outputs of newGrby contains oneTrue
// - we do not want to unnest these as we have no way of
// fixing up the oneTrue replaced by (count(1)>0) predicate
// upwards as the groupby can only output what is part of its
// aggregate or grouping expression. We could solve this by adding
// the expression to the group expression, but at the moment there
// isn't enought time to adequately test the semantic effects of
// such a change. This should be looked at for phase 3.
ValueIdSet emptySet, coveredSubs, newOutput;
const ValueIdSet& filterPreds = newJoin->selectionPred();
// If the filter preds do not reference anything from the inner side
// we do not unnest this subquery with left joins.
if ( (NOT filterPreds.isEmpty()) &&
filterPreds.referencesOneValueFromTheSet
(oldGBgrandchild->getGroupAttr()->getCharacteristicOutputs()) == FALSE )
{
// We are not referencing anything from the inner side...
// Predicate does not reference the Inner Table
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
{
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Join predicate has no references to inner table.");
}
oldGB->eliminateFilterChild();
return NULL ;
}
// Check to see if the newGrby contains a oneTrue
// which is used to represent an EXIST.
// if we find one we want to replace it with a
// count(1)>0 predicate , and count(1) will then replace the
// oneTrue as the aggregate.
// This is done so to get a nullinstantiated version of
// the constant, thus preserving the semantic of the
// query. The actual transformation of this happens in
// nullPreserveMyExprs(), but we need to do some checking before we get
// that far.
ValueId ignoreReturnedVid;
ValueId oneTrueVid;
// change newJoin into a left join and move preds into the join predicate
// we have already guaranteed that all selection preds in the newJoin are
// from the filter node and they do not contain any aggregates.
CMPASSERT((newJoin->getOperatorType() == REL_JOIN) ||
(newJoin->getOperatorType() == REL_TSJ));
newJoin->setOperatorType(REL_LEFT_JOIN) ;
newJoin->joinPred() = newJoin->selectionPred();
newJoin->selectionPred().clear();
// Want the left join to take over the VEG region from
// the old scalar-agg and use that to the
// right child region (subtreeId = 1)
VEGRegion* oldGBRegion = normWARef.locateVEGRegion(oldGB,0);
CMPASSERT (oldGBRegion != NULL);
normWARef.getSqoWA()->insertChangedRelExpr(oldGB, newJoin,
SQO_REASSIGNED_VREGION, oldGBRegion->getRegionId(), 0, 1);
normWARef.reassignVEGRegion(oldGB, 0, newJoin, 1);
return( nullPreserveMyExprs(normWARef));
} // GroupByAgg::nullPreservingTransformation()
/*-----------------------------------------------------------------------
GroupByAgg::nullPresereMyExprs()
// This method takes care of nullInstantiate any of the outputs from
// the right child of the Left join.
//
// It also rewrites the GroupBy's expressions in terms of the newly
// nullinstantiated values.
//
// It then creates a MapValueId on top of the groupBy if the groupBy outputs
// any of those values that got NullInstantiated. This faciliatates translating
// the NullInstantiated values back to their original form before we introduced
// the LeftJoin. The top part of the map has the original ValueIds in it, so
// that we do not need to rewrite the tree above us.
//
// A before and after picture is shown below:
//
//
// GroupBy {pred3} MapValueId {topMap:original outputs of GroupBy
// | | bottomMap:new NullInstantiated outputs of GroupBy}
// | |
// Join {SP:pred2} ----------> GroupBy {pred3, aggregateExpr and groupExpr
// / \ | expressed in terms of nullInstantiated output of LeftJoin}
// / \ |
// X Y {pred1} LeftJoin{JP:pred2}
// / \
// / \
// X Y{pred1}
//
// The MapValueId node shown here is present only if the GroupBy has outputs
// from the right side of the Join. The aggregateExpr in the transformed GroupBy
// has new aggregates if the original aggregate contains count or oneTrue
// aggregates.
//
// Note: It is assumed that this function is called after the GroupBy has moved
// on top of the LeftJoin!
//
// Expects: A LeftJoin as the groupBy's child
//
// Sideffects: 1) NullInstantiates outputs of the LeftJoin steming from the
// the LeftJoins right child.
//
// 2) count(*) and count(keyCol) has already been translated into
// a count(1). Add the 1 to the LeftJoins output and
// nullInstantiate it. For a count(col), col is already part
// of the LeftJoin's output. Then change the opType of the
// count() to be of type ITM_COUNT_NONULL to take care of the
// count bug. Since we now changed the the itemExpr that is
// common to the old relExpr tree, we need to remember this
// in the SqoWA(member of NormWA) so we can undo it if we
// need to give up.
// Note we can have several counts in here..
//
// 3) EXIST is translated earlier in the compiler to a ITM_ONE_TRUE.
// Replace the ONETRUE with a count(0) similarly to what we did
// in 2) above. Again we need to remember this change as we
// are changing an itemExpr common to the old tree.
// There will be only ONE special aggregate like ONE_TRUE.
//
// 4) The selection predicate, aggregate expression,
// grouping expression,leftUnique expressions and output
// expression are rewritten in terms of the nullInstantiated
// outputs from the LeftJoin.
//
// 5) if the rewritten outputs of the groupBy contains any
// nullInstantiated values from the LeftJoin, we need to
// insert a mapValueId node on top of the groupBy to translate
// between the old and the new groupBys. The tree above us
// expects the old ones.
------------------------------------------------------------------------------*/
RelExpr* GroupByAgg::nullPreserveMyExprs( NormWA& normWARef)
{
GroupByAgg * newGrby = this;
ValueId oneTrueVid;
ValueId anyTrueVid;
Join * newJoin = (Join*) child(0)->castToRelExpr();
RelExpr * joinRightChild = newJoin->child(1)->castToRelExpr();
CollHeap *stmtHeap = CmpCommon::statementHeap() ;
// For safety, in case someone calls this method out of context.
if (newJoin->getOperatorType() != REL_LEFT_JOIN)
return this;
// Get the outputs of the Left Joins right child
// we need to add a constant to it to get it nullinstantiated
// if we have a count or a oneTrue aggregate in the newGrby
ValueIdSet currentOutputs = joinRightChild->getGroupAttr()->getCharacteristicOutputs();
// Handle the count case
if (newGrby->aggregateExpr().containsCount())
{
// count(*) gets translated earlier in the compiler
// to count(1).
// If we have a count(*) situation, add the constant
// to the join's right child's output so that it can
// get nullInstantiated
// Doing so takes care of the infamous count() bug.
// We also need to make sure the count operator is of type
// ITM_COUNT_NONULL.
// count(col) works as is as long as we make sure that
// the count operator is of type ITM_COUNT_NONULL.
// In the case where col is non Nullable count(col) also
// gets translated into a count(1)
// Need to nullInstantiate any outPuts from the right side..
for ( ValueId vid = newGrby->aggregateExpr().init();
newGrby->aggregateExpr().next(vid);
newGrby->aggregateExpr().advance(vid))
{
if ((vid.getItemExpr()->origOpType() == ITM_COUNT_STAR__ORIGINALLY) ||
(vid.getItemExpr()->origOpType() == ITM_COUNT))
{
// Found a count(*) or a count(col)
// a count(*) is represented as a count(1)
// Make sure we add the constant as a fake output
// of the leftJoin so that it will be nullInstantiated.
// In the case of count(col), col is already an output from the
// leftJoin.
//
// Add the const used in count(*) expression to the joins
// output so it can be nullinstantiated.
if (vid.getItemExpr()->child(0)->getOperatorType() == ITM_CONSTANT)
{
currentOutputs += vid.getItemExpr()->child(0)->getValueId();
}
normWARef.getSqoWA()->insertChangedItemExpr(vid, SQO_NEWOPTYPE, vid.getItemExpr(),
vid.getItemExpr()->getOperatorType());
// unconditionally change the COUNT to COUNT_NONNULL
// This constant will be nullinstantiated below
vid.getItemExpr()->setOperatorType(ITM_COUNT_NONULL);
}
}
}
if ( aggregateExpr().containsOneTrue(oneTrueVid) )
{
ItemExpr *constVal = new (stmtHeap) SystemLiteral(1);
// replace the OneTrue aggreate in the newGrby selection
// predicate with the manufactured count(1) > 0 in
// the groupby selection predicate.
// Also replace the OneTrue aggregate with the
// count(1) aggregate in the newGrby's aggregate.
// create the new count(1) aggregate.
Aggregate * dummyAgg = new (stmtHeap)
Aggregate(ITM_COUNT_NONULL, constVal);
// Create the count(1)>0 predicate.
BiRelat *fakeCountPred = new (stmtHeap)
BiRelat(ITM_GREATER,
dummyAgg,
new (stmtHeap) SystemLiteral(0));
fakeCountPred->synthTypeAndValueId();
// Need to nullInstantiate any outPuts from the right side..
// Add the fake column to the output of the join's right child.
// By having the LeftJoin output the fake constant we can tell
// if the row gets nullInstantiated. If the fake constant comes
// back as NULL, we have a nullInstantiated row!
// This can only happen after we do the synthTypeAndValueId() above...
// if this line needs to move above, then you have to call
// synthTypeAndValueId on the constVal...
currentOutputs += constVal->getValueId();
// Retain the old itemExpr so that we can restore it
// if we bail from unnesting..
normWARef.getSqoWA()->insertChangedItemExpr(oneTrueVid, SQO_REPLACED,
oneTrueVid.getItemExpr());
// By using replaceItemExpr, we immediately
// fix up the newGrby's selection predicate if it
// contained the oneTrue.
oneTrueVid.replaceItemExpr(fakeCountPred);
// Fix up the aggregate.
newGrby->aggregateExpr() -= oneTrueVid;
newGrby->aggregateExpr() += dummyAgg->getValueId();
}
if (newGrby->aggregateExpr().containsAnyTrue(anyTrueVid))
{
// For the cases where the groupBy's selection predicate
// contains a AnyTrue(), we need to add in an additional
// check to also allow nullInstantiated rows to pass or to
// transform its result to be of equivalent value to that of
// the aggregate in its nested form.
// For example
//
// The following query:
// SELECT A FROM T1 WHERE B = ALL
// (SELECT T2.D FROM T2 WHERE T2.D = T1.B) OR EXISTS
// (SELECT T3.F FROM T3 WHERE T3.H > T1. B AND T3.H < T1.A)
// order by 1;
//
// Root Root
// | |
// Tsj -> Tsj
// / \ / \
// Tsj ScalAgg2 MapVid ScalAgg2
// / \ \ / \
// T1 ScalAgg T3 GroupBy T3
// \ |
// T2 LeftJoin
// / \
// T1 T2
//
// In this example, ScalAgg in the nested case produces the following
// AnyTrue() aggregate: AnyTrue(T2.D <> T1.B), which is an input to ScalAgg2
// If we have a row in T1, where T1.B is NULL, the nested ScalAgg will
// get a NO_DATA from the scan of T2 for that row, which means an empty
// group, in which case ANY_TRUE will evaluate to FALSE.
//
// In the unnested case, the same row from T1 will produce a NullInstantiated
// row when joined with T2 due to the leftJoin, thus the group passed up
// to the GroupBy (which for the unnested case also produces the same
// AnyTrue() aggregate), contains 1 row, and the AnyTrue() aggregate will
// evaluate to UNKNOWN due to the NULL value for T1.B and the NULL value
// for T2.D.
//
// To solve this problem for the unnested case, we add a fake constant
// to the output of the LeftJoin, and augment the anyTrue predicate to be
// AnyTrue(T2.D <> T1.B AND NOT IsNull(fakeConst)).
//
//
// We have a similar problem when the groupBy contains a Not AnyTrue()
// selection predicate as a result of a translation of an ALL expression to
// a NOT ANY.
// Create the fake constant
ItemExpr *constVal = new (stmtHeap) SystemLiteral(1);
constVal->synthTypeAndValueId();
// Need to add the fake constant to the group Expression
// since the expression we are ANDing in is not part of the
// aggregate expression .
newGrby->groupExpr() += constVal->getValueId();
// Create the IS NULL predicate
UnLogic *newIsNullPred = new (stmtHeap)
UnLogic(ITM_IS_NULL, constVal);
newIsNullPred->synthTypeAndValueId();
ItemExpr *anyTrueExpr = anyTrueVid.getItemExpr()->child(0);
// Create the Not IS NULL predicate
UnLogic *newNotPred = new (stmtHeap) UnLogic(ITM_NOT, newIsNullPred);
newNotPred->synthTypeAndValueId();
// AND it with the existing AnyTrue predicate..
BiLogic *newPred = new (stmtHeap) BiLogic(ITM_AND,
anyTrueExpr,
newNotPred);
newPred->synthTypeAndValueId(TRUE);
// Remember what we changed, so it can be restored if we need to back out..
normWARef.getSqoWA()->insertChangedItemExpr(
anyTrueVid,
SQO_NEWCHILD,
anyTrueExpr,
0);
// assign the new predicate to the AnyTrue node.
anyTrueVid.getItemExpr()->child(0) = newPred;
// Need to nullInstantiate any outputs from the right side..
// Add the fake column to the output of the left join.
// By having the LeftJoin output the fake constant we can tell
// if the row gets nullInstantiated. If the fake constant comes
// back as NULL, we have a nullInstantiated row!
// We facilitate this by adding the constant to the LeftJoins
// nullInstantiatedOutput list.
currentOutputs += constVal->getValueId();
}
// NullInstantiate the output from the newJoin's right child.
ValueIdList &nullOutputList = newJoin->nullInstantiatedOutput();
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
for (ValueId exprId = currentOutputs.init();
currentOutputs.next(exprId);
currentOutputs.advance(exprId))
{
ValueId nullId = exprId.nullInstantiate(&bindWA,TRUE);
nullOutputList.insert(nullId);
}
newJoin->normalizeNullInstantiatedOutput(normWARef);
ValueIdSet aggExprRewritten, selPredsRewritten;
ValueIdSet leftUniqueExprRewritten, grpExprRewritten;
const ValueIdSet &selPreds = newGrby->getSelectionPred();
const ValueIdSet &grpExpr = newGrby->groupExpr();
const ValueIdSet &aggExpr = newGrby->aggregateExpr();
const ValueIdSet &leftUniqueExpr = newGrby->leftUniqueExpr();
// Create a copy of the newJoins map so that our rewrites
// do not have unwanted sideffects where the join might
// put any of the newGrby aggregates in its output.
ValueIdMap *rightChildMap = new (stmtHeap)
ValueIdMap(newJoin->rightChildMapForLeftJoin());
// This is kind of counter intuitive.
// We the top part of the map contains the nullInstantiated
// predicates, which we want to use in the groupBy's
// aggregate expression, thus the use of rewriteValueIdSetUp,
// and the reversal of the arguments...
rightChildMap->rewriteValueIdSetUp(aggExprRewritten,aggExpr);
// Now we need to look for count(1) (same as count(*)
// and substitute that with the nullinstantiation of the
// const column.
newGrby->aggregateExpr() = aggExprRewritten;
// Remap the selection predicate as well.
rightChildMap->rewriteValueIdSetUp(selPredsRewritten,selPreds);
newGrby->setSelectionPredicates(selPredsRewritten);
// Remap the group expression as well.
rightChildMap->rewriteValueIdSetUp(grpExprRewritten,grpExpr);
newGrby->groupExpr() = grpExprRewritten;
// Remap the leftUnique expression as well. Needed for when we move
// above a Left Join
rightChildMap->rewriteValueIdSetUp(leftUniqueExprRewritten,leftUniqueExpr);
newGrby->leftUniqueExpr() = leftUniqueExprRewritten;
newGrby->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
// Create a MapValueID Node on top of the
// GroupBy that map between the old join
// output used above and the new GroupBy output
NABoolean mapNeeded = FALSE;
ValueIdSet rewrittenGbyOutputs;
ValueIdSet gbyOutputs = newGrby->getGroupAttr()->getCharacteristicOutputs();
ValueIdMap *map = new (stmtHeap) ValueIdMap;
// cannot use the rewriteValueIdSetUp routine for the outputs,
// as we need to construct a map for the outputs and the ValueIdSets
// used in rewriteValueIdSetUp() do not retain the order of the Vids
// using a ValueIdList doesn't help either as the order is reversed,
// and it seemed like a bad idea to rely on that to never change.
for (ValueId topMapId = gbyOutputs.init();
gbyOutputs.next(topMapId); gbyOutputs.advance(topMapId))
{
ValueId bottomMapId;
rightChildMap->rewriteValueIdUp(bottomMapId,topMapId);
// Only if our outputs will actually be different, do we want to
// create a map.
// One would think that it should be ok to add elemets to the map
// that have the same value in both the upper and lower part,
// but that ends up producing incorrect output.... so we only
// add elements that are
// different.
if ( topMapId != bottomMapId )
{
mapNeeded = TRUE;
}
// Add a new mapping entry for the MapValeIds node.
map->addMapEntry(topMapId, bottomMapId);
rewrittenGbyOutputs += bottomMapId;
}
newGrby->getGroupAttr()->setCharacteristicOutputs(rewrittenGbyOutputs);
// For phase 3 we need to remember that we created a map so
// If we are moving a GroupBy on top of a LeftJoin (that we already
// have converted), we don't want to create an additional map. This
// since the first map already maps any of the output from this left
// join.
if ( mapNeeded )
{
MapValueIds * newMap = newGrby->buildMapValueIdNode(map);
return newMap;
}
return newGrby;
} // GroupByAgg::nullPreserveMyExprs()
/*-----------------------------------------------------------------------------
// Join::moveUpGroupByTransformation()
// MoveUp GroupBy transformation. Relevant only for subqueries with
// two or more levels of nesting. For a two level subquery, at this stage
// the new tree looks like
// MovedUpGroupByTail(newJoin(X2,moveUpGroupBy(Y2))).
// If the selection pred. of moveUpGroupBy and/or Y2 contain outer references
// those predicates will have to be pulled up so that newJoin does not have
// to be a TSJ. The first step in this process is to apply the MoveUpGroupBy
// transformation which will change the new tree to
// MovedUpGroupByTail(moveUpGroupBy(newJoin(X2,Y2))).
// movedUpGrbyTail(newGrby) movedUpGrbyTail(newGrby)
// | |
// | |
// newJoin moveUpGroupBy
// / \ |
// / \ ------> |
// X2 moveUpGroupBy newJoin
// | / \
// | / \
// Y2 X2 Y2
// If Y2 is GroupBy that has been marked for moveUp (which can happen if we have
// more than 2 levels of nesting), then it the next iteration through the while
// loop below the old Y2 will become the new moveUpGrby and the old moveUpGroupBy
// will become the new movedUpGrpupByTail.
// If the query has N levels of nesting, we may have to move N-1 GroupBy
// nodes over the newly introduced Join.
// If subquery unnesting has introduced Left Joins and MapValueId nodes
// through the NullPreservingTransformation, then the moveUpGroupTransformation
// is slightly different from the figure shown above. If MapValueId nodes are
// present the transformation will be as shown below. Note that newJoin can be
// a regular Join or a LeftJoin. Note that In phase 2 we allow only atmost one
// LeftJoin to be introduced by subquery unnesting per query, thus there can
// be at most one MapValueId node introduced by subquery unnesting.
// The transformation shown below will occur at most once per query.
// In phase 3 this restriction will go away as we will then be able to unnest
// multiple subqueries that requires a Left Join.
// movedUpGrbyTail(topGrby) movedUpGrbyTail(topGrby)
// | |
// | |
// newJoin moveUpMap
// / \ |
// / \ ------> |
// X2 moveUpMap moveUpGroupBy
// | |
// | |
// moveUpGroupBy newJoin
// | / \
// | / \
// Y2 X2 Y2
//
//
// Expects: child(1) to be a GroupBy with the moveUp flag set, or a mapValueId
// with a GroupBy child that has the moveUp flag set.
//
// Sideffects: If successfull will return a pointer to a groupBy that
// is a copy of the groupBy marked for moveUp which now
// has this join as its child. This new groupby will also
// have its grouping expression altered, as well as its inputs
// and outputs. The groupBy's selection predicate will also
// contain any of the join's selection predicates that contained
// an aggregate from the original groupBy.
-------------------------------------------------------------------------------*/
GroupByAgg* Join::moveUpGroupByTransformation(GroupByAgg* topGrby,
NormWA & normWARef)
{
GroupByAgg *moveUpGrby;
ValueIdSet emptySet ;
GroupByAgg *movedUpGrbyTail = topGrby;
RelExpr * joinRightChild = child(1)->castToRelExpr();
CollHeap *stmtHeap = CmpCommon::statementHeap() ;
MapValueIds *moveUpMap = NULL;
while ((joinRightChild->getOperatorType() == REL_GROUPBY) &&
(((GroupByAgg*) joinRightChild)->requiresMoveUp()) ||
((joinRightChild->getOperatorType() == REL_MAP_VALUEIDS) &&
((( (RelExpr*) joinRightChild->child(0))->getOperatorType() == REL_GROUPBY) &&
(((GroupByAgg*) joinRightChild->child(0)->castToRelExpr())->requiresMoveUp()))))
{
if (isLeftJoin())
{
// We do not want to pull the groupBy above the left join
// as it will restrict tuples from the left side of the left
// join to flow up. This needs to be revisited for phase 3
// when we transform to a Left Leaning tree when we have
// LeftJoins.
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Can not move groupBy above LeftJoin.");
return NULL;
}
if (joinRightChild->getOperatorType() == REL_MAP_VALUEIDS)
{
moveUpMap = (MapValueIds*) copyNodeAndSetChildren(
joinRightChild, stmtHeap);
moveUpGrby = (GroupByAgg*) copyNodeAndSetChildren
(
joinRightChild->child(0)->castToRelExpr(),
stmtHeap
) ;
moveUpMap->child(0) = moveUpGrby;
moveUpMap->getGroupAttr()->clearLogProperties();
}
else
{
moveUpMap = NULL;
moveUpGrby = (GroupByAgg*) copyNodeAndSetChildren(
joinRightChild,stmtHeap) ;
}
moveUpGrby->getGroupAttr()->clearLogProperties();
joinRightChild = copyNodeAndSetChildren
(
moveUpGrby->child(0)->castToRelExpr(),
stmtHeap
);
//Join may have predicates that reference aggregates in moveUpGrby
// If so pull these preds into moveUpGrby
NABoolean safeToMoveGrby;
safeToMoveGrby = pullUpPredsWithAggrs(moveUpGrby, moveUpMap);
if (NOT safeToMoveGrby )
{
// The join contains aggregates, skip this subquery.
return NULL ;
}
child(1) = joinRightChild ;
moveUpGrby->child(0) = this ;
if (moveUpMap != NULL)
movedUpGrbyTail->child(0) = moveUpMap ;
else
movedUpGrbyTail->child(0) = moveUpGrby ;
// set up inputs and outputs of moveUpGroupBy taking into account
// its new location in the query tree.
moveUpGrby->addGroupExpr(movedUpGrbyTail->groupExpr());
moveUpGrby->getGroupAttr()->setCharacteristicInputs(emptySet);
moveUpGrby->primeGroupAttributes();
moveUpGrby->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
// Need to make sure we nullInstantiate anything that we need from
// the right hand side, as we may have just moved over a Left Join.
// This function is a no-op if our child is not a LeftJoin.
RelExpr * result = moveUpGrby->nullPreserveMyExprs(normWARef);
if (result == NULL)
return NULL;
if (moveUpMap)
{
moveUpMap->getGroupAttr()->setCharacteristicInputs(emptySet);
moveUpMap->primeGroupAttributes();
moveUpMap->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
moveUpMap->pushdownCoveredExpr(
moveUpMap->getGroupAttr()->getCharacteristicOutputs(),
moveUpMap->getGroupAttr()->getCharacteristicInputs(),
emptySet);
}
movedUpGrbyTail->pushdownCoveredExpr(
movedUpGrbyTail->getGroupAttr()->getCharacteristicOutputs(),
movedUpGrbyTail->getGroupAttr()->getCharacteristicInputs(),
movedUpGrbyTail->selectionPred()
);
// Sometimes the moveUpMap ends up being empty after being moved
// on top of a Join. Eliminate it if we don't need it, otherwise
// it will impede output flow.
if ( moveUpMap != NULL &&
moveUpMap->getGroupAttr()->getCharacteristicOutputs().isEmpty())
{
movedUpGrbyTail->child(0) = moveUpMap->child(0);
// set up inputs and outputs of moveUpGroupBy taking into account
// its new location in the query tree.
moveUpGrby->addGroupExpr(movedUpGrbyTail->groupExpr());
moveUpGrby->getGroupAttr()->setCharacteristicInputs(emptySet);
moveUpGrby->primeGroupAttributes();
moveUpGrby->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
// Repush
movedUpGrbyTail->pushdownCoveredExpr(
movedUpGrbyTail->getGroupAttr()->getCharacteristicOutputs(),
movedUpGrbyTail->getGroupAttr()->getCharacteristicInputs(),
movedUpGrbyTail->selectionPred()
);
}
// does moveUpGroupBy still have outer references? If NO then it need
// not "move over" any more Join nodes.
ValueIdSet outerReferences;
moveUpGrby->getGroupAttr()->getCharacteristicInputs().
getOuterReferences(outerReferences);
if (outerReferences.isEmpty())
moveUpGrby->setRequiresMoveUp(FALSE) ;
// moveUpGroupBy will have grouping cols of movedUpGroupByTail +
// left unique cols that were computed for it previously +
// cols needed to provide its outputs +
// cols needed to compute its selection pred.
// the superSet (second param) for this call is the current
// value for the groupExpr. In other words the aim of the call below
// is to see if the groupExpr for the moveUpGrby can be reduced
// from the setting that was done a few lines earlier.
moveUpGrby->computeGroupExpr(movedUpGrbyTail->groupExpr(),
moveUpGrby->groupExpr(), normWARef);
// movedUpGrbyTail is set to moveUpGroupBy in case there are more
// GroupBys that need to be moved up.
movedUpGrbyTail = moveUpGrby ;
}
// end of MoveUpGroupBy transformation
// note that if the specific pattern of GroupBy and Joins shown here
// is not present then this transformation will not be applied and
// right subtree of the new Join will contain outer references. This
// will cause unnesting this TSJ/Join to fail below, and we will revert
// to the orginal nested tree for this subquery level.
return movedUpGrbyTail;
} // Join::moveUpGroupByTransformation()
/*----------------------------------------------------------------------------
// GroupByAgg::subqueryUnnestFinalize()
// set up inputs/outputs of the new Join, its children
// and the newJoin's parent GroupBy.
// move selection predicates to the appropriate nodes.
// return FALSE if any outer references remain or if
// sufficient outputs cannot be produced to compute left side's
// unique columns
//
// Expects: Child(0) to be a Join or a subQ groupby.
// Sideffects: recomputed inputs and outputs of the join child's children
// recomputed inputs and outputs of the join.
// pushes any of the groupBy's predicates down that can go down.
// pushes any of the join's predicates down that can go down.
-------------------------------------------------------------------------------*/
NABoolean GroupByAgg::subqueryUnnestFinalize(ValueIdSet& newGrbyGroupExpr,
NormWA& normWARef)
{
Join * newJoin = NULL ;
if (child(0)->getOperatorType() == REL_GROUPBY)
newJoin = (Join*) child(0)->child(0)->castToRelExpr();
else
newJoin = (Join*) child(0)->castToRelExpr();
RelExpr * newLeftChild = newJoin->child(0)->castToRelExpr();
RelExpr * newRightChild = newJoin->child(1)->castToRelExpr();
newLeftChild->primeGroupAttributes();
newRightChild->primeGroupAttributes();
newLeftChild->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
newRightChild->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
ValueIdSet nonLocalPreds,valuesReqdByParent,availableInputs,outerReferences;
// availableInputs is the requiredInputs of the newJoin minus
// any outer references. These outer references are not really
// available as our intention is to apply this transformation
// at succcesive levels and unnest all subqueries.
availableInputs = newJoin->getGroupAttr()->getCharacteristicInputs();
availableInputs += newRightChild->getGroupAttr()->getCharacteristicInputs();
availableInputs.getOuterReferences(outerReferences);
availableInputs -= outerReferences ;
for (Int32 i = 0; i < 2; i++) {
// --------------------------------------------------------------------
// Check to see if we have any Outer References in our child's selection
// predicate
// If we do we want to pull it up .
// ---------------------------------------------------------------------
if (newJoin->child(i)->selectionPred().getReferencedPredicates
(outerReferences, nonLocalPreds))
{
if ((i == 1)&&newJoin->isLeftJoin())
newJoin->joinPred() += nonLocalPreds ;
else
newJoin->selectionPred() += nonLocalPreds ;
newJoin->child(i)->selectionPred() -= nonLocalPreds ;
newJoin->child(i)->recomputeOuterReferences();
nonLocalPreds.clear();
}
}
//computing Join's inputs/outputs
newJoin->primeGroupAttributes();
newJoin->getGroupAttr()->normalizeInputsAndOutputs(normWARef);
//push down any of the groupBy's predicates that we can.
pushdownCoveredExpr( getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred() );
// Rules for pushdown from Join during this transformation are different
// in two ways from the usual.
// 1) If left child does not cover any part of a
// VEGPred it will still be retained in the Join, so that it can be pulled
// further up the query tree as we apply this transformation at other levels
// In the usual rules, the VEGPred will be pushed down to the right child
// without being retained at the Join
ValueIdSet emptySet;
valuesReqdByParent = newJoin->getGroupAttr()->getCharacteristicOutputs() ;
newJoin->pushdownCoveredExprSQO(valuesReqdByParent,
availableInputs,
newJoin->selectionPred(),
emptySet,
TRUE, // keepPredsNotCoveredByChild0
TRUE); // keepPredsNotCoveredByChild1
// check if right child still contains outer references. If so
// this subquery level cannoy be unnested. Give up and return the
// old TSJ. Note that other subquery levels may still be
// successfully unnested.
outerReferences.clear();
newRightChild->getGroupAttr()->getCharacteristicInputs().
getOuterReferences(outerReferences);
if (NOT(outerReferences.isEmpty()))
{
// right child still has outer references
// cannot unnest this subquery
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Right child has outer references that cannot be removed by current unnesting.");
return FALSE ;
}
// Is the Join producing all the outputs needed for the new goruping
// columns of the GroupBy? If not make a tree walk down the left subtree,
// increase outputs as needed at various child levels so that this Join
// can produce the needed values. If we fail unnesting is not possible
// at this level.
ValueIdSet additionalOutputsNeeded = newGrbyGroupExpr;
additionalOutputsNeeded -= newJoin->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet savedOutputsNeeded = additionalOutputsNeeded ;
if (newJoin->getMoreOutputsIfPossible(additionalOutputsNeeded))
{
newJoin->getGroupAttr()->addCharacteristicOutputs(savedOutputsNeeded);
}
else
{
// left sub-tree cannot produce additional columns required to group
// by the left unique cols. Cannot unnest this subquery.
// Can occur if left-subtree contains UNION, TRANSPOSE, SEQUENCE or SAMPLE
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Left subtree cannot produce output values required for grouping.");
return FALSE ;
}
return TRUE;
} // GroupByAgg::subqueryUnnestFinalize()
/*----------------------------------------------------------------------------
// Join::applyInnerKeyedAccessHeuristic()
//
// Checks to see if the join predicate is on a key column of the inner table
// and the key column is the leading key column.
//
// Expects: a child chain like this:
// GroupBy->Filter->Scan
//
// Sideffects: Doesn't change anything.
-------------------------------------------------------------------------------*/
NABoolean Join::applyInnerKeyedAccessHeuristic(const GroupByAgg* newGrby,
NormWA & normWARef)
{
RelExpr *oldGBgrandchild;
// note that the child of oldGB is actually a Filter node, here
// oldGBgrandchild is the child of oldGB before the Filter was added.
if (child(1)->child(0)->getOperatorType() == REL_FILTER)
oldGBgrandchild = child(1)->child(0)->child(0)->castToRelExpr();
else
oldGBgrandchild = child(1)->child(0)->castToRelExpr();
// Apply inner table keyed scan heuristic. This heuristic turns off subquery
// unnesting for this tsj if the join predicate is on a key column of the inner table.
// The heuristic applies only if the key column is the leading key column
// of a base table or an index. No consideration is made for the selectivity
// of the index. This heuristic applies only if
// 1. comp_bool_168 is OFF
// 2. Inner side of tsj is a scan (not another subquery)
// 3. There is only one level of nesting or this is tree subquery
// 4. The number of tables below this join is LEQ COMP_INT_46 (default value is 10)
// If there are multiple levels of nesting the benefit of this heuristic is
// doubtful as unnesting the lowest level will allow higher levels to be unnested.
if((CmpCommon::getDefault(COMP_BOOL_168) == DF_OFF) &&
(oldGBgrandchild->getOperatorType() == REL_SCAN) &&
((normWARef.getCorrelatedSubqCount() == 1) ||
(NOT (((GroupByAgg*)newGrby)->requiresMoveUp()))) &&
(getGroupAttr()->getNumJoinedTables() <=
ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_46)))
{
RelExpr *oldGB = child(1)->castToRelExpr();
const TableDesc * tableDesc =
((Scan *)oldGBgrandchild)->getTableDesc();
const LIST(IndexDesc *) & ixlist = tableDesc->getIndexes();
ValueIdSet preds = oldGB->child(0)->castToRelExpr()->selectionPred();
ValueIdSet leadingKeyCols ;
for (CollIndex j = 0; j < ixlist.entries(); j++)
{
// get only the leading key column from every access path.
ValueId vid = ixlist[j]->getOrderOfKeyValues()[0];
ItemExpr *colIE = vid.getItemExpr();
if (colIE->getOperatorType() == ITM_VEG_REFERENCE)
{
// get the valueid of the VEG
leadingKeyCols +=
((VEGReference *)colIE)->getVEG()->getValueId();
}
}
for (ValueId x = preds.init();
preds.next(x);
preds.advance(x))
{
ItemExpr *ie = x.getItemExpr();
if (ie->getOperatorType() == ITM_VEG_PREDICATE)
{
ValueId id = ((VEGPredicate *)ie)->getVEG()->getValueId();
if (leadingKeyCols.contains(id))
{
child(1)->eliminateFilterChild();
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: Join predicate is on a leading key column of inner table.");
return TRUE ;
}
}
}
}
return FALSE;
} // Join::applyInnerKeyedAccessHeuristic()
// -----------------------------------------------------------------------
// Join::semanticQueryOptimizeNode(). This method facilitate the entry
// point for semantic Query Optimization. It will attempt the following types
// of optimization:
//
// a) join elimination
// b) transform semi joins to inner joins
// c) subqueries unnesting
//
// For the time being, only one of these transformations happens on a
// single query. Phase 3 may look at allowing multiple transformation
// on the same query.
//
// The prework for checking if a particular transformation is possible
// occured in the transformer, where we set flags to indicate what kind of
// transformation a particular query is a candidate for. The other thing that
// may happen in the transformer, if we decide a query is a candidate for SQO,
// is that we may create a filter node to hold predicates with outer references.
// One of the main functions of the filter is to prevent pushdown of predicates
// with outer references.
//
// JOIN ELIMINATION:
// For join elimination we apply the following rules:
// 1) If predicates have been marked for removal
// Join {selection_pred: p1,p2,p3,...pn} --> Join {selection_pred: p3,...pn}
// where p1 and p2 are equi join predicates that are known to be true due
// to a foreign_key-unique_key relationship
//
// 2) If the children of the join are marked for removal
//
// parent
// | parent
// Join |
// / \ ------> X
// X Y
//
// where the node Y has been marked for elimination by the synthLogPhase.
//
// 3) If its a left join and has been markedForElimination by the normalize
// phase then
//
// parent
// | parent
// LeftJoin |
// / \ ------> X
// X Y
//
// SEMI-JOIN TRANSFORMATION
//
// a) If the right child is unique in the joining column and the semi join
// can be simply translated into a join.
//
// An example query is
//
// select t1.a
// from t1
// where t1.b in (select t2.a
// from t2) ;
// Here t2.a is a unique key of table t2.
//
// The following transformation is made
// Semi Join {pred : t1.b = t2.a} ------> Join {pred : t1.b = t2.a}
//
// b) If the right child is not unique in the joining column then
// we transform the semijoin into an inner join followed by a groupby
// as the join's right child. This transformation is enabled by default
// only if the right side is an IN list, otherwise a CQD has to be used.
//
// groupby (X.key)
// SemiJoin |
// / \ ------> Join
// X Y / \
// X Y
//
// SUBQUERY UNNESTING
// The subquery unnesting consist of two main transformations:
// pullUpGroupBy and moveUpGroupBy transformation
// which are based on Dayal and Muralikrishna's algorithm (see below).
//
// a) pullUpGroupBy transformation:
//
// For a single level subquery this is the only transformation required for
// subquery unnesting.
//
// TSJ GroupBy
// / \ |
// X ScalarAgg --> Join (pred)
// | / \
// Filter (pred) X Y
// |
// Y
//
// For a multilevel query this may happen several times.
// Under certain circumstances, in a multilevel subquery, we may also need
// to apply the moveupGroupBy transformation.
//
// b) moveUpGroupBy transformation:
//
// When the pullUpGroupBy transformation has to be applied more than once
// on a query tree (for multi-level subqueries), then it is possible that
// that a groupBy below still contains outer references. For example with
// a two level query, this is what the tree will look like after applying
// the pullUpGroupBy transformation twice:
//
// TSJ2 GroupBy2
// / \ pullUpGroupBy |
// X ScalarAgg2 transformation Join2
// | (2 times) / \
// Filter2 ----------> X GroupBy1
// | \
// TSJ1 Join1
// / \ / \
// Y ScalarAgg1 Y Z
// \
// Filter1
// \
// Z
//
// If the selection pred. of GroubBy1 and/or Join1 contain outer
// references after the transformation, those predicates will have to
// be pulled up so that Join2 does not have to be a TSJ. See the comment
// in Join::moveUpGroupByTransformation() for how the right side
// of the picture above gets transformed further.
//
// One additional complication occurs when we need to convert any of the
// TSJs into a LeftJoin. This conversion occurs during either or both
// the pullUpGroupBy or moveUpGroupBy transformation. If we require a LeftJoin
// we manipulate the predicates and null-instantiated outputs of the LeftJoin
// that is from the right subtree in order to preserve correctness. Refer
// to the infamous count bug!
// For more details, please refer to
// M. Muralikrishna, "Improved Unnesting Algorithms for Join Aggregate SQL Queries",
// Proc. VLDB Conf., pp. 91-102 (1992)
// -----------------------------------------------------------------------
RelExpr * Join::semanticQueryOptimizeNode(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// SemanticQueryOptimize each child.
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// SemanticQueryOptimize the left and right subtrees. Store pointers to
// the roots of the subtrees after SQO.
// ---------------------------------------------------------------------
if (isFullOuterJoin())
normWARef.locateAndSetVEGRegion(this, 0 /* first child */);
child(0) = child(0)->semanticQueryOptimizeNode(normWARef);
if(isFullOuterJoin())
normWARef.restoreOriginalVEGRegion();
if (ownsVEGRegions())
{
// -------------------------------------------------------------
// Locate and set the VEGRegion for the right subtree.
// -------------------------------------------------------------
if (isFullOuterJoin())
normWARef.locateAndSetVEGRegion(this, 1 /* second child */);
else
normWARef.locateAndSetVEGRegion(this);
child(1) = child(1)->semanticQueryOptimizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
}
else
{
child(1) = child(1)->semanticQueryOptimizeNode(normWARef);
}
// In the bottom up phase of the SQO tree walk
// check if there are
// a) joins to be eliminated or
// b) semi joins to transform to inner joins or
// c) any subqueries to unnest
// a) Join Elimination
/*---------------------------------------------------------------------------------------*/
RelExpr* reducedExpr = eliminateRedundantJoin(normWARef);
if (reducedExpr != this)
return reducedExpr;
// b) SemiJoin Transformation
/*---------------------------------------------------------------------------------------*/
if (candidateForSemiJoinTransform()) // we have a semi join that could be transformed to
// an inner join + group by.
{
reducedExpr = transformSemiJoin(normWARef);
if (reducedExpr != this)
return reducedExpr;
}
// c) Subquery Unnesting
/*---------------------------------------------------------------------------------------*/
if (candidateForSubqueryUnnest())
{
// SQO phase is called in a loop for join elimination
// we do not want to attempt unnesting on the same node twice.
setCandidateForSubqueryUnnest(FALSE);
// Outer references are kept in a filter node, if there are no outer
// references then unnesting is not needed.
// For subqueries that are not correlated the Filter node will be absent
// as the method createAFilterParentIfNecessary() would not have created
// a Filter node at this point in the query tree. Therefore non-correlated
// subqueries will not enter the loop.
// If comp_bool_221 is on we will unnest even if there is no filter node.
if ((CmpCommon::getDefault(COMP_BOOL_221) == DF_OFF) &&
((child(1)->getArity() != 1) ||
!(child(1)->castToRelExpr()->hasFilterChild())))
{
if (CmpCommon::getDefault(SUBQUERY_UNNESTING) == DF_DEBUG)
*CmpCommon::diags() << DgSqlCode(2997)
<< DgString1("Subquery was not unnested. Reason: No Correlation found");
return this ; // do nothing, no correlated subquery
}
// Increment the subquery id counter in SqoWA so that we can
// destinguish between things we change for this subquery over another..
normWARef.getSqoWA()->incrementSubQId();
// Main body of subquery unnesting. The PullUpGroupBy transformation and
// the MoveUpGroupBy transformation are applied here.
// The PullUpGroupByTransformation
GroupByAgg* newGrby = pullUpGroupByTransformation(normWARef);
if (newGrby == NULL)
return this;
// If inner table is accessed on a leading column of any access path
// then do not unnest this subquery. We perform this check after
// the pullUpGroupByTransformation() since we want to cover the
// the case of a Tree Query and we need to know if the newGrby requires
// a moveUpGroupBy transformation.
if(applyInnerKeyedAccessHeuristic((const GroupByAgg*)newGrby,normWARef))
return this;
MapValueIds* newMap = NULL ;
if (candidateForSubqueryLeftJoinConversion())
{
RelExpr* result = newGrby->nullPreservingTransformation(
(GroupByAgg*)child(1)->castToRelExpr(),
normWARef);
if (result == NULL)
{
normWARef.getSqoWA()->undoChanges(normWARef);
return this;
}
if (result->getOperatorType() == REL_MAP_VALUEIDS)
newMap = (MapValueIds*) result;
}
// Apply MoveUp GroupBy transformation. Relevant only for subqueries with
// two or more levels of nesting. If moveUpGroupBy is not needed
// movedUpGroupByTail will be set to newGrby.
RelExpr* gbChild = newGrby->child(0)->castToRelExpr();
Join * newJoin = NULL;
GroupByAgg * newJoinParent = newGrby;
if (gbChild->getOperatorType() == REL_GROUPBY)
{
newJoin = (Join*) gbChild->child(0)->castToRelExpr();
newJoinParent = (GroupByAgg*) gbChild;
}
else
newJoin = (Join*) gbChild;
GroupByAgg* movedUpGrbyTail =
newJoin->moveUpGroupByTransformation(newJoinParent, normWARef);
NABoolean hasNoErrors;
if (movedUpGrbyTail != NULL)
{
hasNoErrors = movedUpGrbyTail->subqueryUnnestFinalize(
newGrby->groupExpr(),
normWARef);
}
if ((movedUpGrbyTail == NULL) || (NOT hasNoErrors))
{
normWARef.getSqoWA()->undoChanges(normWARef);
child(1)->eliminateFilterChild();
return this ;
}
// this subquery level has been successfully unnested. Left linearize the
// join backbone. Comp_int_11 can be used to not left linearize as we
// go further up the tree. This is not advised as the the analyzer expects
// the tree to be left linear in many situations. This control is kept
// as it provides the possibility to see some interesting plans.
if ((ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_11) < 0) ||
(newJoin->child(1)->getGroupAttr()->getNumJoinedTables() <=
ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_11)))
{
newJoin = newJoin->leftLinearizeJoinTree(normWARef,
UNNESTING); // Unnesting
movedUpGrbyTail->child(0) = newJoin ;
}
//synthesize logical props for the new nodes.
if (newMap == NULL)
{
newGrby->synthLogProp(&normWARef);
return newGrby ;
}
else
{
newMap->synthLogProp(&normWARef);
return newMap ;
}
}
else
{
// this subquery was not unnested, but we could have other transformations
// that would render the tree no longer left linearized
if ((ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_11) < 0) ||
(child(1)->getGroupAttr()->getNumJoinedTables() <=
ActiveSchemaDB()->getDefaults().getAsLong(COMP_INT_11)))
{
return leftLinearizeJoinTree(normWARef, SEMI_JOIN_TO_INNER_JOIN); //
}
}
/*---------------------------------------------------------------------------------------*/
return this;
} // Join::semanticQueryOptimizeNode()
NABoolean Join::prepareMeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &commonPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
if (isTSJForWrite() ||
isTSJForUndo() ||
isTSJForMerge() ||
getIsForTrafLoadPrep())
return FALSE;
// The caller of this methods added "commonPredicatesToAdd" to
// predicates_ (the generic selection predicates stored in the
// RelExpr). That works for both inner and non-inner joins. The
// only thing we have left to do is to recompute the equi-join
// predicates.
findEquiJoinPredicates();
return TRUE;
}
// ***********************************************************************
// $$$$ Union
// member functions for class Union
// ***********************************************************************
// -----------------------------------------------------------------------
// Union::transformNode()
// -----------------------------------------------------------------------
void Union::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
//++Triggers -
if (getBlockedUnion())
normWARef.setInBlockedUnionCount();
// ---------------------------------------------------------------------
// Compartmentalize the VEGRegions between the left and the right
// child so that their VEGs get installed in different VEGRegions,
// where they rightfully belong. It prevents unenforcable "="
// relationships from being deduced transitively.
// The VEGRegion for each child of the union is only allowed to
// import outer references. It cannot "export" any "=" relationships.
// ---------------------------------------------------------------------
// Allocate a new VEGRegion within the scope of my own VEGRegion
// for my left child.
normWARef.allocateAndSetVEGRegion(IMPORT_ONLY,this,0);
// Make values available to left child
child(0)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
// Transform the left child.
child(0)->transformNode(normWARef, child(0));
// Return to my own VEGRegion.
normWARef.restoreOriginalVEGRegion();
// Allocate another new VEGRegion within the scope of my own VEGRegion
// for my right child.
normWARef.allocateAndSetVEGRegion(IMPORT_ONLY,this,1);
// Make values available to right child
child(1)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
// Transform the right child.
child(1)->transformNode(normWARef, child(1));
// Return to my own VEGRegion.
normWARef.restoreOriginalVEGRegion();
// No need to transform colMapExprList because the source and the target
// expressions will be transformed by their own operators.
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
pullUpPreds();
// transform the selection predicates
transformSelectPred(normWARef, locationOfPointerToMe);
// this Union will be removed during optimization. So send the outputs
// of its left child to the parent. That is what will happen when this
// node is later removed.
if ((getIsTemporary()) &&
(getGroupAttr()->getCharacteristicOutputs().entries() == 0))
{
GroupAttributes * childGAPtr = child(0).getPtr()->getGroupAttr();
getGroupAttr()->setCharacteristicOutputs(childGAPtr->getCharacteristicOutputs());
}
//++Triggers -
if (getBlockedUnion())
normWARef.restoreInBlockedUnionCount();
} // Union::transformNode()
// -----------------------------------------------------------------------
// Union::pullUpPreds()
// -----------------------------------------------------------------------
void Union::pullUpPreds()
{
// For a predicate to be pulled up from the children it has to
// be part of both children. The only predicates we can detect
// as being part of both child are those that only use correlated
// references. and are identical (i.e. use the same value id's
// for expressions under the tree).
//
// Other predicates will require a more sophisticated pattern matching.
//
ValueIdSet commonPredicates(child(0)->getSelectionPred());
commonPredicates.intersectSet(child(1)->getSelectionPred());
selectionPred() += commonPredicates;
child(0)->selectionPred() -= commonPredicates;
child(1)->selectionPred() -= commonPredicates;
child(0)->recomputeOuterReferences();
child(1)->recomputeOuterReferences();
} // Union::pullUpPreds()
// -----------------------------------------------------------------------
// Union::recomputeOuterReferences()
// -----------------------------------------------------------------------
void Union::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty())
{
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// Weed out those expressions not needed by my selection predicates
// and by my left and right children as input values.
ValueIdSet exprSet = getSelectionPred();
exprSet += child(0)->getGroupAttr()->getCharacteristicInputs();
exprSet += child(1)->getGroupAttr()->getCharacteristicInputs();
// Add conditional expression for conditional union.
exprSet.insertList(condExpr());
exprSet.insertList(alternateRightChildOrderExpr()); //++MV
// Add the output expressions of each child that are cached in the
// UnionMap. If a child references an external input value in its
// output, i.e., the select list of the SELECT, but that external
// input is not referenced elsewhere in the query, then no record
// of such a reference exists but in the UnionMap. This is so
// because the RelRoot for the subselect under the Union, which
// contained the select list is eliminated by transformNode().
// When the RelRoot is eliminated, its characteristic inputs
// are added to its children. However, Union::pullUpPreds()
// calls recomputeOuterReferences on each child. The latter call
// wipes out all such external input values that are not referenced
// elsewhere in the query. In order to ensure that such external
// input values flow down to the Union, add them to exprSet.
exprSet.insertList(getLeftMap().getBottomValues());
exprSet.insertList(getRightMap().getBottomValues());
exprSet.weedOutUnreferenced(outerRefs);
getGroupAttr()->setCharacteristicInputs(outerRefs);
}
} // Union::recomputeOuterReferences()
// -----------------------------------------------------------------------
// UnionMap::normalizeSpecificChild()
// -----------------------------------------------------------------------
void UnionMap::normalizeSpecificChild(NormWA & normWARef, Lng32 childIndex)
{
// Normalize the maps constructed for the union, replacing
// valueIds with VegRef's where appropriate.
ValueIdUnion * viduPtr;
for (CollIndex index = 0; index < colMapTable_.entries(); index++)
{
viduPtr = ((ValueIdUnion *)(colMapTable_[index].getItemExpr()));
CMPASSERT(viduPtr->getOperatorType() == ITM_VALUEIDUNION);
viduPtr->normalizeSpecificChild(normWARef, childIndex);
}
switch (childIndex)
{
case 0:
leftColMap_.normalizeNode(normWARef);
break;
case 1:
rightColMap_.normalizeNode(normWARef);
break;
default:
CMPASSERT(childIndex < 2);
break;
}
} // UnionMap::normalizeSpecificChild()
// -----------------------------------------------------------------------
// Union::rewriteNode()
// -----------------------------------------------------------------------
void Union::rewriteNode(NormWA & normWARef)
{
// Locate the VEGRegion that I had allocated for my left child.
normWARef.locateAndSetVEGRegion(this,0);
// Normalize expressions contributed by child(0)
child(0)->rewriteNode(normWARef);
// Normalize expressions contributed by child(0)
getUnionMap()->normalizeSpecificChild(normWARef, 0);
normWARef.restoreOriginalVEGRegion();
// Locate the VEGRegion that I had allocated for my right child.
normWARef.locateAndSetVEGRegion(this,1);
// Normalize expressions contributed by child(1)
child(1)->rewriteNode(normWARef);
// Normalize expressions contributed by child(1)
getUnionMap()->normalizeSpecificChild(normWARef, 1);
// ++MV
// The alternate right child order expression should be normalized in
// the region of the right child
if (alternateRightChildOrderExpr().normalizeNode(normWARef))
{
}
// --MV
normWARef.restoreOriginalVEGRegion();
// Normalize the predicates.
if (selectionPred().normalizeNode(normWARef))
{
}
if (condExpr().normalizeNode(normWARef))
{
}
// for embedded statements, when a blocked union is introduced by triggers
// that will later be removed use the veg region of the left child before
// normalizing outputs.
if (getIsTemporary())
normWARef.locateAndSetVEGRegion(this,0);
// Rewrite my own Group Attributes
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // Union::rewriteNode()
// -----------------------------------------------------------------------
// Union::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * Union::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
// ---------------------------------------------------------------------
// Check which expressions can be evaluated by the children of the union.
// Modify the Group Attributes of those children who inherit some of
// these expressions.
// ---------------------------------------------------------------------
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred()
);
// ---------------------------------------------------------------------
// Normalize the left and right subtrees. Store pointers to the
// roots of the subtrees after normalization.
// ---------------------------------------------------------------------
// Locate the VEGRegion that I had allocated for my left child.
normWARef.locateAndSetVEGRegion(this, 0);
child(0) = child(0)->normalizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
// Locate the VEGRegion that I had allocated for my left child.
normWARef.locateAndSetVEGRegion(this, 1);
child(1) = child(1)->normalizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
fixEssentialCharacteristicOutputs();
normWARef.setExtraHubVertex(this);
return this; // return a -> to self
} // Union::normalizeNode()
// -----------------------------------------------------------------------
// Union::semanticQueryOptimizeNode()
// This instance of the SQO virtual method is the same as the base class
// implementation except that it also keeps track of which
// VEGRegion we are currently in.
// -----------------------------------------------------------------------
RelExpr * Union::semanticQueryOptimizeNode(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// SemanticQueryOptimize the left and right subtrees. Store pointers to
// the roots of the subtrees after SQO.
// ---------------------------------------------------------------------
// Locate the VEGRegion that I had allocated for my left child.
normWARef.locateAndSetVEGRegion(this, 0);
child(0) = child(0)->semanticQueryOptimizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
// Locate the VEGRegion that I had allocated for my left child.
normWARef.locateAndSetVEGRegion(this, 1);
child(1) = child(1)->semanticQueryOptimizeNode(normWARef);
normWARef.restoreOriginalVEGRegion();
return this;
} // Union::semanticQueryOptimizeNode()
NABoolean Union::prepareTreeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &commonPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
NABoolean result = TRUE;
// we only support UNION nodes without local predicates, which
// should be all cases, since there should not be any predicates on
// a UNION
if (getSelectionPred().entries() > 0)
{
info->getConsumer(0)->emitCSEDiagnostics(
"Selection predicates on union node not supported");
return FALSE;
}
// recursively call this for the children
for (CollIndex i=0; i<2 && result; i++)
{
ValueIdSet locOutputsToAdd(outputsToAdd);
ValueIdSet childOutputsToAdd;
ValueIdSet childPredsToRemove;
ValueIdSet childPredsToAdd;
ValueIdMap *map = (i==0 ? &getLeftMap() : &getRightMap());
ValueIdSet availableValues(map->getTopValues());
ValueIdSet dummyValuesForVEGRewrite;
ValueIdSet mappedKeyColumns;
ValueIdSet childKeyColumns;
// if there are outputs to add, we can only do that for
// outputs that already exist in the ValueIdMap
availableValues += getGroupAttr()->getCharacteristicInputs();
if (locOutputsToAdd.removeUnCoveredExprs(availableValues))
{
info->getConsumer(0)->emitCSEDiagnostics(
"Not able to add output values unknown to union operator");
result = FALSE;
}
map->rewriteValueIdSetDown(outputsToAdd, childOutputsToAdd);
map->rewriteValueIdSetDown(predicatesToRemove, childPredsToRemove);
map->rewriteValueIdSetDown(commonPredicatesToAdd, childPredsToAdd);
result = child(i)->prepareTreeForCSESharing(
childOutputsToAdd,
childPredsToRemove,
childPredsToAdd,
inputsToRemove,
dummyValuesForVEGRewrite,
childKeyColumns,
info);
map->mapValueIdSetUp(mappedKeyColumns, childKeyColumns);
// include only those that actually got mapped
mappedKeyColumns -= childKeyColumns;
keyColumns += mappedKeyColumns;
}
if (result)
{
NABoolean dummy;
CollIndex nu = unionMap_->leftColMap_.getBottomValues().entries();
getGroupAttr()->addCharacteristicOutputs(outputsToAdd);
getGroupAttr()->removeCharacteristicInputs(inputsToRemove);
// add columns that are a constant in at least one of the
// UNION's children to the key columns. Such columns can be used
// to eliminate entire legs of the union and therefore act like
// key or partition key columns.
for (CollIndex u=0; u<nu; u++)
{
if (unionMap_->leftColMap_.getBottomValues()[u].getItemExpr()->
castToConstValue(dummy) ||
unionMap_->rightColMap_.getBottomValues()[u].getItemExpr()->
castToConstValue(dummy))
keyColumns += unionMap_->colMapTable_[u];
}
}
// there is no need to call prepareMeForCSESharing() here
return result;
}
// ***********************************************************************
// $$$$ GroupByAgg
// member functions for class GroupByAgg
// ***********************************************************************
void GroupByAgg::transformNode(NormWA & normWARef,
ExprGroupId &locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
NABoolean needsNewVEGRegion = FALSE;
// ---------------------------------------------------------------------
// Each scalar aggregate allocates a VEGRegion for "=" predicates that
// do not reject null values.
// It can only import a VEG from another VEGRegion in which an "outer
// reference" is involved.
// select empname
// from employee
// where not exists
// (select branchnum
// from branch
// where ( employee.branchnum = branch.branchnum)
// and (branchnum = 1)
// group by branchnum
// );
// It is legal to deduce that employee.branchnum = 1 within the subquery
// but not in the main query.
// ---------------------------------------------------------------------
if (groupExpr().isEmpty())
{
// -----------------------------------------------------------------
// Fix to "BR0198" (Genesis 10-000303-8476).
// If there's no grouping expression and no aggregation expression,
// then aggregate over a constant, i.e.,
// make one single group (zero or one "row") of the entire table.
// See Ansi 7.8 SR 1 + GR 1 (HAVING clause).
// See /regress/fullstack/test002 cases.
// By adding a constant to the grouping expression we are treating
// this as a nonScalar grby.
// -----------------------------------------------------------------
if (aggregateExpr().isEmpty())
{
ItemExpr *tf = new(normWARef.wHeap()) ConstValue(0);
tf->synthTypeAndValueId(TRUE);
groupExpr() += tf->getValueId();
}
else if (NOT containsNullRejectingPredicates())
{
needsNewVEGRegion = TRUE;
normWARef.allocateAndSetVEGRegion(IMPORT_ONLY,this);
}
}
// ---------------------------------------------------------------------
// Transform child. Pull up its transformed predicates
// recompute their required inputs.
// ---------------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
// My child has now been transformed.
// A new semiJoin may now be my direct descendant and my original
// child a descendant of it.
// In either case my child has now been transformed.
// ---------------------------------------------------------------------
// A Group By clause can only contain column references.
// An aggregate function cannot contain a subquery according to SQL2.
// However, the group by list and aggregate functions could be columns
// from a derived table and may therefor contain subselects an all
// sorts of nasty things. So we allow anything here.
//
// Subqueries in the group by list and aggregate functions should
// introduce semijoins below the groupby and subqueries in the
// having clause above the groupby
//
// Order of work should be
// process group by
// process aggregate expressions
// pull up predicates
// process having clause
// ---------------------------------------------------------------------
if (groupExpr().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */ ) )
{
// The group by list apparently had some subqueries that had not been
// processed before (scracth, scratch..). Normalize the new
// tree that has become our child.
child(0)->transformNode(normWARef, child(0));
}
if (aggregateExpr().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */ ) )
{
// The aggregate was on a subquery that had not been
// processed before (scracth, scratch..). Normalize the new
// tree that has become our child.
child(0)->transformNode(normWARef, child(0));
}
// Pull up the predicates into my having clause and recompute the
// required inputs of whoever my children are now.
pullUpPreds();
if (needsNewVEGRegion)
{
// Restore the original VEGRegion.
normWARef.restoreOriginalVEGRegion();
}
// transform the selection predicates
normWARef.setInHavingClause(TRUE) ;
transformSelectPred(normWARef, locationOfPointerToMe);
normWARef.setInHavingClause(FALSE) ;
} // GroupByAgg::transformNode()
// -----------------------------------------------------------------------
// GroupByAgg::pullUpPreds()
// -----------------------------------------------------------------------
void GroupByAgg::pullUpPreds()
{
// ---------------------------------------------------------------------
// Pull up predicates from the child.
// move them to my having clause
// ---------------------------------------------------------------------
// Make inputs available to child
child(0)->getGroupAttr()->addCharacteristicInputs(getGroupAttr()->getCharacteristicInputs());
// Parts of the rules for this virtual method is that recomputOuterRefs()
// should be called on the child even if no predicates are pulled up
// from it.
child(0)->recomputeOuterReferences();
// If this is a scalar groupby that can produce NULL values then predicates
// cannot be moved up.
if (groupExpr().isEmpty() && NOT containsNullRejectingPredicates())
return;
if (child(0)->getSelectionPred().isEmpty())
return;
// Only predicates that reference group by columns or
// other input values can be pulled up.
// We are going to prime group attributes ahead of time here so that
// we can call coverTest() from here.
ValueIdSet saveExternalInputs = getGroupAttr()->getCharacteristicInputs();
primeGroupAttributes();
ValueIdSet predicatesToPullUp, boringSet, predicatesThatStay;
getGroupAttr()->coverTest(child(0)->selectionPred(),
saveExternalInputs, // Like passing empty
predicatesToPullUp,
boringSet,
&predicatesThatStay);
if (NOT predicatesToPullUp.isEmpty())
{
selectionPred() += predicatesToPullUp;
child(0)->selectionPred() -= predicatesToPullUp;
child(0)->recomputeOuterReferences();
}
getGroupAttr()->setCharacteristicInputs(saveExternalInputs);
} // GroupByAgg::pullUpPreds()
// -----------------------------------------------------------------------
// GroupByAgg::recomputeOuterReferences()
// -----------------------------------------------------------------------
void GroupByAgg::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr += groupExpr();
allMyExpr += aggregateExpr();
allMyExpr.weedOutUnreferenced(outerRefs);
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // GroupByAgg::recomputeOuterReferences()
// -----------------------------------------------------------------------
// GroupbyAgg::rewriteNode()
// -----------------------------------------------------------------------
void GroupByAgg::rewriteNode(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// Each scalar aggregate allocates a VEGRegion for "=" predicates that
// do not reject null values.
// It can only import a VEG from another VEGRegion in which an "outer
// reference" is involved.
// select empname
// from employee
// where not exists
// (select branchnum
// from branch
// where ( employee.branchnum = branch.branchnum)
// and (branchnum = 1)
// group by branchnum
// );
// It is legal to deduce that employee.branchnum = 1 within the subquery
// but not in the main query.
// ---------------------------------------------------------------------
NABoolean needsNewVEGRegion = FALSE;
if (groupExpr().isEmpty() && (NOT containsNullRejectingPredicates()))
{
needsNewVEGRegion = TRUE;
normWARef.locateAndSetVEGRegion(this);
}
// ---------------------------------------------------------------------
// Rewrite the expressions of the child.
// ---------------------------------------------------------------------
child(0)->rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Rewrite the expressions that are grouping expressions
// ---------------------------------------------------------------------
if (groupExpr().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite the expressions that are rollup grouping expressions
// ---------------------------------------------------------------------
if (rollupGroupExprList().normalizeNode(normWARef))
{
}
normalizeExtraOrderExpr(normWARef);
// ---------------------------------------------------------------------
// Rewrite the expressions that are aggregate expressions
// ---------------------------------------------------------------------
if (aggregateExpr().normalizeNode(normWARef))
{
}
// 10-050616-8826 -BEGIN
// If transformation has not happened then its a possiblity that
// the "TYPE" of the ItemExpr can change. For Example case when
// we transform outer joins to inner joins.
if(NOT aggregateExpr().isEmpty())
{
ValueIdSet postExpr = aggregateExpr();
for(ValueId exprId = postExpr.init(); postExpr.next(exprId); postExpr.advance(exprId))
{
const NAType &type1 = exprId.getType();
const NAType &type2 = exprId.getItemExpr()->child(0).getValueId().getType();
if( NOT(type1 == type2) )
{
exprId.getItemExpr()->synthTypeAndValueId(TRUE);
}
}
}
// 10-050616-8826 -END
// ---------------------------------------------------------------------
// If we're enforcing an ITM_ONE_ROW on (x,y), then we can produce not
// merely the ITM_ONE_ROW, but also x and y, so add them to our outputs.
// For example, if the aggregate is, say,
// ITM_ONE_ROW(VEGRef_10(T.A,ixT.A), VEGRef_15(T.B,ixT.B))
// { example query: select * from S where (select A,B from T) < (100,200) }
// then add value ids 10 and 11 to our characteristic outputs.
// ---------------------------------------------------------------------
ValueIdSet moreOutputs;
getPotentialOutputValues(moreOutputs);
getGroupAttr()->addCharacteristicOutputs(moreOutputs);
// ---------------------------------------------------------------------
// Restore the VEGRegion of my parent.
// ---------------------------------------------------------------------
if (needsNewVEGRegion)
normWARef.restoreOriginalVEGRegion();
// ---------------------------------------------------------------------
// Rewrite the expressions in the HAVING clause predicate.
// ---------------------------------------------------------------------
if (selectionPred().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite my own Group Attributes
// ---------------------------------------------------------------------
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // GroupbyAgg::rewriteNode()
// -----------------------------------------------------------------------
// GroupbyAgg::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * GroupByAgg::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
// ---------------------------------------------------------------------
// Each scalar aggregate allocates a VEGRegion for "=" predicates that
// do not reject null values.
// It can only import a VEG from another VEGRegion in which an "outer
// reference" is involved.
// select empname
// from employee
// where not exists
// (select branchnum
// from branch
// where ( employee.branchnum = branch.branchnum)
// and (branchnum = 1)
// group by branchnum
// );
// It is legal to deduce that employee.branchnum = 1 within the subquery
// but not in the main query.
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
// If one of my HAVING preds is a truth-test that always evaluates to TRUE,
// remove it; in particular, remove IS_NOT_UNKNOWN(IS_NOT_NULL(myAggrExpr))
// (doubtless created by dissectOutSubqueries in NormItemExpr.cpp)
// as redundant, the aggregation already being enforced by this GroupByAgg.
// ---------------------------------------------------------------------
DBGSETDBG( "TRANSFORM_DEBUG" )
DBGIF(
unp = "";
unp += "sel:";
selectionPred().unparse(unp);
unp += "\nagg:";
aggregateExpr().unparse(unp);
)
ItemExpr *bottomOfTest;
ValueIdSet &agg = aggregateExpr();
ValueIdSet &sel = selectionPred();
for (ValueId svid = sel.init(); sel.next(svid); sel.advance(svid))
{
bottomOfTest = UnLogicMayBeAnEliminableTruthTest(svid.getItemExpr(),TRUE);
if (bottomOfTest)
if (bottomOfTest->isAnAggregate())
for (ValueId avid = agg.init(); agg.next(avid); agg.advance(avid))
if (bottomOfTest == avid.getItemExpr())
{
DBGIF(
cerr << unp << endl;
cerr << "Eliminating aggr "<< svid << endl;
)
sel.subtractElement(svid); // svid, not avid!
}
else
{
DBGIF(
cerr << unp << endl;
cerr << "Eliminating having-pred " << svid << endl;
)
sel.subtractElement(svid);
}
}
// ---------------------------------------------------------------------
// Check which expressions can be evaluated by my child.
// Modify the Group Attributes of those children who inherit some of
// these expressions.
// Check if any of the HAVING clause predicates can be pushed down
// (only when a Group By list is given).
// ---------------------------------------------------------------------
// if this is a rollup groupby, then do not pushdown having pred to
// child node. If pushdown is done, then it might incorrectly process rows that
// are generated during rollup groupby processing.
// For ex:
// insert into t values (1);
// select a from t group by rollup(a) having a is not null;
// If 'having' pred is pushdown to scan node as a where pred,
// then SortGroupBy will return all rollup groups generated
// and represented as null. They will not be filtered out which
// they would if having pred is applied after rollup group materialization.
// Maybe later we can optimize so this pushdown is done if possible,
// for ex, if there are no 'is null/not null' having preds on grouping cols.
if (NOT isRollup())
{
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred()
);
}
NABoolean needsNewVEGRegion = FALSE;
if (groupExpr().isEmpty() && (NOT containsNullRejectingPredicates()))
{
needsNewVEGRegion = TRUE;
normWARef.locateAndSetVEGRegion(this);
}
// ---------------------------------------------------------------------
// Normalize the child.
// ---------------------------------------------------------------------
child(0) = child(0)->normalizeNode(normWARef);
if (needsNewVEGRegion)
normWARef.restoreOriginalVEGRegion();
fixEssentialCharacteristicOutputs();
if (CmpCommon::getDefault(CASCADED_GROUPBY_TRANSFORMATION) != DF_OFF)
{
checkForCascadedGroupBy(normWARef);
}
return this; // return a -> to self
} // GroupbyAgg::normalizeNode()
// -----------------------------------------------------------------------
// GroupByAgg::semanticQueryOptimizeNode()
// This instance of the SQO virtual method is the same as the base class
// implementation except that it also keeps track of which
// VEGRegion we are currently in.
// -----------------------------------------------------------------------
RelExpr * GroupByAgg::semanticQueryOptimizeNode(NormWA & normWARef)
{
NABoolean needsNewVEGRegion = FALSE;
if (groupExpr().isEmpty() && (NOT containsNullRejectingPredicates()))
{
needsNewVEGRegion = TRUE;
normWARef.locateAndSetVEGRegion(this);
}
// ---------------------------------------------------------------------
// UnNest the child.
// ---------------------------------------------------------------------
child(0) = child(0)->semanticQueryOptimizeNode(normWARef);
if (needsNewVEGRegion)
normWARef.restoreOriginalVEGRegion();
eliminateCascadedGroupBy(normWARef);
return this;
} // GroupByAgg::semanticQueryOptimizeNode()
// This method checks if we can merge multiple group by nodes that are next
// to each other into a single group by and then marks the group by node
// that can be eliminated so it could eliminated during the SQO phase.
// Following are the conditions under which a bottom GB node can be eliminated
// 1) If the grouping columns of the top group by node are a subset of the
// grouping columns of the bottom group by node.
// 2) If all the aggreate expressions of the top group by can be rewritten to
// use the bottom values in such a way it does not change the output.
// For now this method would handle the following aggregate expressions
// to be rolled up.
// SUM(SUM(a)) => SUM(a)
// SUM( COUNT(a)) => COUNT(a)
// SUM( COUNT(*)) => COUNT(*)
// MIN( MIN(a)) => MIN(a)
// MAX( MAX(a)) => MAX(a)
void GroupByAgg::checkForCascadedGroupBy(NormWA & normWARef)
{
if (child(0)->getOperatorType() == REL_GROUPBY)
{
GroupByAgg *childGB = (GroupByAgg*)(child(0)->castToRelExpr());
if ( childGB->groupExpr().contains(groupExpr()) &&
childGB->selectionPred().isEmpty() )
{
NABoolean allExprsCanBeRolledup = TRUE;
for (ValueId x = aggregateExpr().init();
aggregateExpr().next(x) &&
allExprsCanBeRolledup;
aggregateExpr().advance(x))
{
CMPASSERT(x.getItemExpr()->isAnAggregate());
Aggregate *aggrExpr = (Aggregate *) x.getItemExpr();
if (!aggrExpr->isDistinct() &&
aggrExpr->child(0)->isAnAggregate())
{
Aggregate *childAggrExpr = (Aggregate *) aggrExpr->child(0)->castToItemExpr();
if (!childAggrExpr->isDistinct())
{
switch (aggrExpr->getOperatorType())
{
case ITM_SUM:
if (aggrExpr->child(0)->getOperatorType() != ITM_SUM &&
aggrExpr->child(0)->getOperatorType() != ITM_COUNT)
allExprsCanBeRolledup = FALSE;
break;
case ITM_MIN:
if (aggrExpr->child(0)->getOperatorType() != ITM_MIN)
allExprsCanBeRolledup = FALSE;
break;
case ITM_MAX:
if (aggrExpr->child(0)->getOperatorType() != ITM_MAX)
allExprsCanBeRolledup = FALSE;
break;
case ITM_COUNT_NONULL:
if (!normWARef.compilingMVDescriptor())
{
allExprsCanBeRolledup = FALSE;
}
else
aggrExprsToBeDeleted() += x;
break;
default:
allExprsCanBeRolledup = FALSE;
break;
}
}
else
allExprsCanBeRolledup = FALSE;
}
else
{
if (normWARef.compilingMVDescriptor() &&
(aggrExpr->getOperatorType() == ITM_COUNT &&
aggrExpr->child(0)->getOperatorType() == ITM_CONSTANT))
aggrExprsToBeDeleted() += x;
else
allExprsCanBeRolledup = FALSE;
}
}
if (allExprsCanBeRolledup)
{
childGB->setIsMarkedForElimination(TRUE);
normWARef.setContainsGroupBysToBeEliminated(TRUE);
}
else
aggrExprsToBeDeleted().clear();
}
}
}
void GroupByAgg::eliminateCascadedGroupBy(NormWA & normWARef)
{
if (child(0)->getOperatorType() == REL_GROUPBY)
{
GroupByAgg *childGB = (GroupByAgg*)(child(0)->castToRelExpr());
short value = 1;
if (childGB->isMarkedForElimination())
{
for (ValueId y = aggrExprsToBeDeleted().init();
aggrExprsToBeDeleted().next(y);
aggrExprsToBeDeleted().advance(y))
{
ItemExpr *constValue = new (CmpCommon::statementHeap())
SystemLiteral(&(y.getType()), &value, sizeof(short));
y.replaceItemExpr(constValue);
constValue->synthTypeAndValueId();
}
aggregateExpr() -= aggrExprsToBeDeleted();
aggrExprsToBeDeleted().clear();
for (ValueId x = aggregateExpr().init();
aggregateExpr().next(x);
aggregateExpr().advance(x))
{
CMPASSERT(x.getItemExpr()->isAnAggregate());
Aggregate *aggrExpr = (Aggregate *) x.getItemExpr();
CMPASSERT(aggrExpr->child(0)->isAnAggregate())
if (aggrExpr->getOperatorType() == ITM_SUM &&
aggrExpr->child(0)->getOperatorType() == ITM_COUNT)
{
aggrExpr->setOperatorType(ITM_COUNT);
// Need to update the type as well
const NAType &origSumType = x.getType();
const NAType &origCountType = aggrExpr->child(0)->getValueId().getType();
// here we change the type of the old SUM(), now new COUNT() to that
// of the original count. This to prevent numeric overflow error.
// See solution: 10-100514-0329.
x.changeType(&origCountType);
// Ideally we should put in a cast node to cast the new count
// type back to the original sum type to maintain the properties
// of the original valueId, but since groupBys only outputs
// valueIds from the aggregateExpr or groupExpr we can't do this
// here. Cast is not an aggregate function, so it cannot go in the
// aggregate expression, and if we group by it we will change the
// the meaning of the groupby.
// so for now we assume we will be ok since Numeric(19) and largeInt
// are roughly eqivalent.
// ItemExpr * castNode =
// new(newNormWA.wHeap()) Cast((x.getItemExpr(),
// &(origSumType));
}
aggrExpr->child(0) = aggrExpr->child(0)->child(0);
}
child(0) = child(0)->child(0);
}
}
}
NABoolean GroupByAgg::prepareMeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &commonPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
// The caller of this method took care of most adjustments to
// make. The main thing the groupby node needs to do is to add any
// outputs that are required to its characteristic outputs.
ValueIdSet myAvailableValues(groupExpr_);
ValueIdSet referencedValues;
ValueIdSet myOutputsToAdd;
ValueIdSet unCoveredExpr;
myAvailableValues += aggregateExpr_;
valuesForVEGRewrite += aggregateExpr_;
// The caller may be asking for expressions on columns, maybe
// even an expression involving grouping columns and aggregates
// and multiple tables, therefore use the isCovered method to
// determine those subexpressions that we can produce here.
NABoolean allCovered =
outputsToAdd.isCovered(myAvailableValues,
*(getGroupAttr()),
referencedValues,
myOutputsToAdd,
unCoveredExpr);
if (allCovered)
myOutputsToAdd = outputsToAdd;
getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
return TRUE;
}
// ***********************************************************************
// $$$$ Scan
// member functions for class Scan
// ***********************************************************************
// -----------------------------------------------------------------------
// Scan::transformNode()
// -----------------------------------------------------------------------
void Scan::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
// ---------------------------------------------------------------------
// Transform the entire column list of the base table to pick up
// equivalences of base table columns and index columns
// ---------------------------------------------------------------------
const ValueIdList &allCols = getTableDesc()->getColumnList();
ItemExpr *oldPtr;
ExprValueId newPtr;
for (CollIndex i = 0; i < allCols.entries(); i++)
{
oldPtr = allCols[i].getItemExpr();
newPtr = oldPtr;
oldPtr->transformNode(normWARef, newPtr, locationOfPointerToMe,
getGroupAttr()->getCharacteristicInputs());
// the column list shouldn't be changed by the transformation
CMPASSERT(oldPtr == newPtr.getPtr());
// ---------------------------------------------------------------------
// Create a VEG with all equivalent index columns and equivalent columns
// ---------------------------------------------------------------------
if (oldPtr->getOperatorType() == ITM_BASECOLUMN)
{
const ValueIdSet &eic = ((BaseColumn *)oldPtr)->getEIC();
for (ValueId eqVid = eic.init(); eic.next(eqVid); eic.advance(eqVid))
{
normWARef.addVEG(((BaseColumn *)oldPtr)->getValueId(),eqVid);
}
//check if this is an clustering key column
NABoolean isClusteringKeyColumn = FALSE;
ValueIdList ckColumns = getTableDesc()->getClusteringIndex()
->getIndexKey();
for (CollIndex j=0; j < ckColumns.entries(); j++)
{
if (allCols[i].getNAColumn()->getPosition() ==
ckColumns[j].getNAColumn()->getPosition())
{
isClusteringKeyColumn = TRUE;
break;
}
}
// If it is a nullable clustering key column and there are indexes
// then set the special nulls flag to TRUE so that during an index
// join the equality predicate between the clustering key
// of the base and the index does reutrn NULL equals NULL
// as TRUE and so finds the base table row in the index table.
if ( isClusteringKeyColumn &&
allCols[i].getType().supportsSQLnull() &&
eic.entries() > 0 )
{
ItemExpr * vegrefPtr = normWARef.getVEGReference(allCols[i]);
if (vegrefPtr)
((VEGReference *)vegrefPtr)->getVEG()->setSpecialNulls(TRUE);
}
}
else
CMPASSERT(oldPtr->getOperatorType() == ITM_BASECOLUMN);
}
// transform the selection predicates
transformSelectPred(normWARef, locationOfPointerToMe);
} // Scan::transformNode()
// -----------------------------------------------------------------------
// Scan::rewriteNode()
// -----------------------------------------------------------------------
void Scan::rewriteNode(NormWA & normWARef)
{
const ValueIdList &allCols = getTableDesc()->getColumnList();
ItemExpr *newPtr = NULL;
// ---------------------------------------------------------------------
// walk through all the columns of the table, normalizing them
// and adding the result into the ColumnVEGList of the table descriptor
// ---------------------------------------------------------------------
CollIndex i = 0;
for (i = 0; i < allCols.entries(); i++)
{
// ---------------------------------------------------------------------
// Create a VEG with all equivalent index columns
// ---------------------------------------------------------------------
newPtr = allCols[i].getItemExpr()->normalizeNode(normWARef);
getTableDesc()->addToColumnVEGList(newPtr->getValueId());
}
// -------------------------------------------------------------------------
// Normalize the indexes.
// -------------------------------------------------------------------------
for (i = 0;
i < (Int32)getTableDesc()->getIndexes().entries();
i++)
{
IndexDesc *idesc = getTableDesc()->getIndexes()[i];
ValueIdList indexOrder(idesc->getOrderOfKeyValues());
// ---------------------------------------------------------------------
// Normalize the asc/desc order of the index.
// ---------------------------------------------------------------------
indexOrder.normalizeNode(normWARef);
idesc->setOrderOfKeyValues(indexOrder);
// ---------------------------------------------------------------------
// Normalize the partitioning keys in the partitioning function.
// ---------------------------------------------------------------------
if (idesc->isPartitioned())
idesc->getPartitioningFunction()->normalizePartitioningKeys(normWARef);
}
// -------------------------------------------------------------------------
// Normalize the Vertical Partitions.
// -------------------------------------------------------------------------
for (i = 0;
i < (Int32)getTableDesc()->getVerticalPartitions().entries();
i++)
{
IndexDesc *idesc = getTableDesc()->getVerticalPartitions()[i];
ValueIdList indexOrder(idesc->getOrderOfKeyValues());
// ---------------------------------------------------------------------
// Normalize the asc/desc order of the index.
// ---------------------------------------------------------------------
indexOrder.normalizeNode(normWARef);
idesc->setOrderOfKeyValues(indexOrder);
// ---------------------------------------------------------------------
// Normalize the partitioning keys in the partitioning function.
// ---------------------------------------------------------------------
// Vertically partitioned tables always have a partitioning
// function, even if there is only one horizontal partition.
//
idesc->getPartitioningFunction()->normalizePartitioningKeys(normWARef);
}
// QSTUFF
// we need to normalize the potential outputs here to avoid problems
// during code generation
potentialOutputs_.normalizeNode(normWARef);
// QSTUFF
// ---------------------------------------------------------------------
// Rewrite the expressions in the selection predicates and
// in the Group Attributes.
// ---------------------------------------------------------------------
RelExpr::rewriteNode(normWARef);
} // Scan::rewriteNode()
// -----------------------------------------------------------------------
// Scan::recomputeOuterReferences()
// -----------------------------------------------------------------------
// void Scan::recomputeOuterReferences()
//
// No virtual method needed
//
// Scan::recomputeOuterReferences()
// -----------------------------------------------------------------------
// Scan::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * Scan::normalizeNode
( NormWA & normWARef )
{
if (nodeIsNormalized())
return this;
RelExpr::normalizeNode(normWARef);
if(CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) != DF_OFF &&
!normWARef.inMVQueryRewrite())
{
ValueIdSet vs,vs1;
ValueId exprId;
ItemExpr *inputItemExprTree = NULL;
ValueIdList selectionPredList(selectionPred());
inputItemExprTree = selectionPredList.rebuildExprTree(ITM_AND,FALSE,FALSE);
CollHeap *heap = normWARef.wHeap();
QRDescGenerator* descGenerator = new (heap) QRDescGenerator(false, heap);
if (CmpCommon::getDefault(MVQR_LOG_QUERY_DESCRIPTORS) == DF_DUMP_MV)
// Used for generating MV descriptors for queries in workload analysis mode.
descGenerator->setDumpMvMode();
// Desc generator needs equality sets or mvqr won't set range bitmap
// correctly for equijoin operands with additional range predicates.
descGenerator->createEqualitySets(selectionPred());
ItemExpr *result = NULL;
ItemExpr *ie = NULL ;
if( inputItemExprTree != NULL )
{
NABoolean transStatus = FALSE;
result = applyAssociativityAndCommutativity(descGenerator,heap,
inputItemExprTree, normWARef,
transStatus);
if(transStatus)
{
// result->synthTypeAndValueId(); // You can not remove it, it causes regression (however Bob told in Code review) for case core/test029
// delete from T29xv2 where j like 'f%'; -- ok ->ValueId of AND node is not available.
result->convertToValueIdSet(vs, NULL, ITM_AND);
if(CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_MINIMUM )
{
for (exprId = vs.init(); vs.next(exprId); vs.advance(exprId))
{
ie = exprId.getItemExpr()->removeRangeSpecItems(&normWARef);
if (ie->getOperatorType() == ITM_AND)
{
OperatorTypeEnum op = ie->child(0)->getOperatorType();
if ( (op == ITM_GREATER_EQ) ||(op == ITM_GREATER) ||
(op == ITM_LESS) ||(op == ITM_LESS_EQ))
{
if(!((BiRelat*)ie->child(0).getPtr())->derivativeOfLike())
{
vs1.insert(ie->child(0)->getValueId());
vs1.insert(ie->child(1)->getValueId());
continue ;
}
}
}
vs1.insert(ie->getValueId());
}
vs.clear();
vs += vs1 ;
}
//doNotReplaceAnItemExpressionForLikePredicates(result,vs,result);
vs.normalizeNode(normWARef);
setSelectionPredicates(vs);
// For testing purpose:
// ValueIdList selectionPredList1(vs);
// ItemExpr * inputItemExprTree0 = selectionPredList1.rebuildExprTree(ITM_AND,FALSE,FALSE);
// oldTree = revertBackToOldTree(heap,inputItemExprTree0);
// oldTree->convertToValueIdSet(leafs, NULL, ITM_AND);
// doNotReplaceAnItemExpression(oldTree,leafs,oldTree);
}
}
}
// the following block of code can transform an OR predicate into
// semijoin(Scan, TupleList)
// where the Scan is this scan node.
// The transformation is in general guarded by tight heuristics
// so that OR preds can be evaluated using a hash table (code in generator)
// selection preds of a scan node can be affected by this code block.
ValueIdSet & preds = selectionPred();
ValueId exprId;
ItemExprList valuesListIE(normWARef.wHeap());
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
ExprGroupId newJoin = this;
ItemExpr *retItemExpr = NULL;
ValueId colVid;
Lng32 numParams;
if (normWARef.getMergeUpdDelCount() == 0)
for (exprId = preds.init(); preds.next(exprId); preds.advance(exprId))
{
if (exprId.getItemExpr()->canTransformToSemiJoin(valuesListIE,
getTableDesc(), numParams, colVid, normWARef.wHeap()))
{
// it is an OR pred. that meets the basic correctness conditions
if (!passSemiJoinHeuristicCheck(exprId, valuesListIE.entries(), numParams, colVid))
{
continue; // did not satisfy heuristics
}
TupleList * tl = new(normWARef.wHeap())
TupleList(valuesListIE.convertToItemExpr(RIGHT_LINEAR_TREE));
tl->setCreatedForInList(TRUE);
RelRoot * rr = new (normWARef.wHeap()) RelRoot(tl);
retItemExpr = new (normWARef.wHeap())
QuantifiedComp(ITM_EQUAL_ANY, colVid.getItemExpr(), rr, FALSE);
((QuantifiedComp*)retItemExpr)->setCreatedFromINlist(TRUE);
retItemExpr->bindNode(&bindWA);
if(bindWA.errStatus())
{
CmpCommon::diags()->clear();
bindWA.resetErrStatus();
continue ;
}
ExprValueId nePtr(retItemExpr);
retItemExpr->transformNode(normWARef, nePtr,
newJoin, getGroupAttr()->getCharacteristicInputs());
if(!(newJoin->getOperator().match(REL_SEMITSJ)))
continue ;
// is an OR pred that passed the heuristics check
preds.remove(exprId);
}
}
// we have changed the tree and introduced at least one semijoin.
if ((RelExpr *)newJoin != this)
{
((RelExpr *)newJoin)->getGroupAttr()->setCharacteristicOutputs
(getGroupAttr()->getCharacteristicOutputs());
((RelExpr *)newJoin)->getGroupAttr()->setCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
primeGroupAttributes();
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
ExprGroupId eg(newJoin);
newJoin->transformNode(normWARef,eg);
newJoin = newJoin->normalizeNode(normWARef);
}
TableDesc * tableDesc = getTableDesc();
// Make sure we rewrite the computedColumn Expressions
const ValueIdList &allCols = tableDesc->getColumnList();
ItemExpr *iePtr;
CollIndex i = 0;
for (i = 0; i < allCols.entries(); i++)
{
iePtr = allCols[i].getItemExpr();
if (((BaseColumn *) iePtr)->getNAColumn()->isComputedColumn())
{
BaseColumn *bc = ((BaseColumn *) iePtr);
ItemExpr *ccExpr = bc->getComputedColumnExpr().getItemExpr();
ccExpr = ccExpr->normalizeNode(normWARef);
bc->setComputedColumnExpr(ccExpr->getValueId());
}
}
SelectivityHint * selHint = tableDesc->selectivityHint();
if (selHint)
{
selHint->setLocalPreds(getSelectionPredicates());
}
CardinalityHint * cardHint = tableDesc->cardinalityHint();
if (cardHint)
{
cardHint->setLocalPreds(getSelectionPredicates());
}
return ((RelExpr *)newJoin);
} // Scan::normalizeNode()
NABoolean Scan::prepareMeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &commonPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
// The caller of this method took care of most adjustments to
// make. The main thing the scan node needs to do is to add any
// outputs that are required to its characteristic outputs.
ValueIdSet myColSet(getTableDesc()->getColumnVEGList());
ValueIdSet referencedCols;
ValueIdSet myOutputsToAdd;
ValueIdSet unCoveredExpr;
// The caller may be asking for expressions on columns, maybe
// even an expression involving multiple tables, therefore use
// the isCovered method to determine those subexpressions that we
// can produce here.
outputsToAdd.isCovered(myColSet,
*(getGroupAttr()),
referencedCols,
myOutputsToAdd,
unCoveredExpr);
getGroupAttr()->addCharacteristicOutputs(myOutputsToAdd);
valuesForVEGRewrite.insertList(getTableDesc()->getColumnList());
keyColumns.insertList(getTableDesc()->getClusteringIndex()->getIndexKey());
return TRUE;
}
/* This method applies a long list of heuristics to determine whether
its better to use a semijoin to evaluate the OR pred or if we should
wait till the generator and use the hash table implementation
OR_PRED_TO_SEMIJOIN = 0 ==> semijoin trans is turned OFF
OR_PRED_TO_SEMIJOIN = <val1>==> semijoin trans kicks in if
a. hash table transformation does not apply for some reason and
b. number of literals in OR pred > <val1>
default is 25.
OR_PRED_TO_JUMPTABLE = 0 ==> hash table trans is turned OFF in generator
OR_PRED_TO_JUMPTABLE = <val2> ==> hash table implemenation shuts OFF for in lists
larger than this size. default value is 5,000
OR_PRED_TO_SEMIJOIN_TABLE_MIN_SIZE : The key column heuristic applies only if table
has more rows than this setting. Default is 10000. The key column heuristic says that
semi join transformation can give a good plan only if number of rows read by probes coming
in less than small fraction of a big table.
OR_PRED_TO_SEMIJOIN_PROBES_MAX_RATIO : Relevant only to the key column heuristic.
This default specifies the ratio specified in the previous comment.
The default value is 0.10. Currently join preds on key columns and multiple IN
lists on key columns are not handled well by the key col heuristic.
The other heuristic checked here relates to the partioning key. If the in list size
is less than half the number of partitions and the partitioning key is covered by
equality preds then we figue that it is better to do the semijoin transformation and
open only a few partitions. Opening a few partitions and sending on avg. one probe to each
one (total number of probes is guaranteed to be less than half the number of partitions)
is better than opening all the partitions and scanning the entire table once.
The first argument vid is the giant OR predicate that we already know meets all
logical criteria for transformation to semijoin.
*/
NABoolean Scan::passSemiJoinHeuristicCheck(ValueId vid, Lng32 numValues,
Lng32 numParams, ValueId colVid) const
{
Lng32 orPredToSemiJoin =
ActiveSchemaDB()->getDefaults().getAsLong(OR_PRED_TO_SEMIJOIN);
Lng32 orPredToJumpTable =
ActiveSchemaDB()->getDefaults().getAsLong(OR_PRED_TO_JUMPTABLE);
Lng32 orPredToSemiJoinTableMinSize =
ActiveSchemaDB()->getDefaults().getAsLong(OR_PRED_TO_SEMIJOIN_TABLE_MIN_SIZE);
float orPredToSemiJoinMaxRatio ;
ActiveSchemaDB()->getDefaults().getFloat(OR_PRED_TO_SEMIJOIN_PROBES_MAX_RATIO,
orPredToSemiJoinMaxRatio);
if (orPredToSemiJoin == 0) // feature is turned OFF
return FALSE;
// if pcode is not available then the hash table implentation does not
// apply. Be more aggressive with semijoin trans.
DefaultToken pcodeOptLevel = CmpCommon::getDefault(PCODE_OPT_LEVEL);
NABoolean unSupportedType = FALSE;
NABoolean noPCodeSupport = FALSE;
UInt32 optFlags = (UInt32)CmpCommon::getDefaultLong(PCODE_OPT_FLAGS);
if (((optFlags & PCodeCfg::INDIRECT_BRANCH) == 0) ||
(pcodeOptLevel == DF_OFF) || (pcodeOptLevel == DF_MINIMUM))
{
noPCodeSupport = TRUE;
}
if (colVid.getType().getTypeQualifier() == NA_NUMERIC_TYPE)
{
const NumericType &ntype = (NumericType &)colVid.getType() ;
if (ntype.isBigNum() || ntype.isDecimal() || (ntype.getScale() > 0))
unSupportedType = TRUE;
}
if (numValues > orPredToSemiJoin) // num of in list values still has to be
{ // greater than OR_PRED_TO_SEMIJOIN
if ( noPCodeSupport ||
(orPredToJumpTable == 0) || (orPredToJumpTable < numValues)|| // hash table imp. is OFF or In list VERY large
(numParams > orPredToSemiJoin) || // params not supported hash table imp.
unSupportedType )
return TRUE;
}
NABoolean isBigTable = FALSE;
CostScalar totalRowCount = getTableDesc()->getTableColStats()[0]->getColStats()->getRowcount();
if (totalRowCount > orPredToSemiJoinTableMinSize)
isBigTable = TRUE;
// We do cycle through all indexes of the base table though
// there is no guarantee that the index we base our decision upon here
// will be chosen by the optimizer.
const LIST(IndexDesc *) & ixlist = getTableDesc()->getIndexes();
for (CollIndex ix =0; ix < ixlist.entries(); ix++)
{
IndexDesc* idesc = ixlist[ix];
ValueIdList keyCols, partKeyCols;
getTableDesc()->getEquivVEGCols(idesc->getIndexKey(), keyCols);
getTableDesc()->getEquivVEGCols(idesc->getPartitioningKey(), partKeyCols);
CollIndex keyColIndex = keyCols.index(colVid);
CollIndex partKeyColIndex = partKeyCols.index(colVid);
if (partKeyColIndex != NULL_COLL_INDEX) // 'a' is a partitioning key column
{
NABoolean applyPartKeyHeuristic = FALSE;
if ((numValues < 0.5*(idesc->getNAFileSet()->getCountOfPartitions())) &&
isBigTable && (numValues > orPredToSemiJoin))
{
// number of clauses in IN List is less than half the number of partitions
applyPartKeyHeuristic = TRUE;
}
for (CollIndex i =0;
(applyPartKeyHeuristic && (i < partKeyCols.entries())); i++)
{
if (i == partKeyColIndex)
continue ;
if (!partKeyCols[i].getItemExpr()->doesExprEvaluateToConstant(FALSE,TRUE)) // equality preds on all part key columns, except 'a'
{
applyPartKeyHeuristic = FALSE;
}
}
if (applyPartKeyHeuristic)
return TRUE;
}
ItemExpr* ie;
if ((keyColIndex != NULL_COLL_INDEX)&& isBigTable) // 'a' is a key column of this index
{
NABoolean fullKeyConstant = TRUE;
NABoolean keyConstantUptoCol = TRUE;
for (CollIndex i =0; i < keyCols.entries(); i++)
{
if (i == keyColIndex)
continue ;
ie = keyCols[i].getItemExpr();
if (!(ie->doesExprEvaluateToConstant(FALSE,TRUE))) // equality preds on all key columns
{
if (i < keyColIndex)
{
fullKeyConstant = FALSE;
keyConstantUptoCol = FALSE;
}
else
{
fullKeyConstant = FALSE;
}
break;
}
}
if (fullKeyConstant)
return TRUE;
if (keyConstantUptoCol && (numValues > orPredToSemiJoin))
return TRUE;
// the following block separates out the key predicates from the selection
// preds of this scan. Then we estimated the number of rows that will
// reult after applying these key predicates. Only local predicates are
// considered. OR preds on key columns, join preds on key columns, etc.
// are not included in this computation. Hopefully these pred types can also be
// considered eventually. Code is mostly a copy of AppliedStatMan::getStatsForCANodeId()
ValueIdSet nonKeyPredicates (getSelectionPred());
ValueIdSet externalInputs = getGroupAttr()->getCharacteristicInputs();
ValueIdSet nonKeyColumnSet;
idesc->getNonKeyColumnSet(nonKeyColumnSet);
SearchKey * skey = new(CmpCommon::statementHeap())
SearchKey (idesc->getIndexKey(),
idesc->getOrderOfKeyValues(),
externalInputs, TRUE,
nonKeyPredicates,
nonKeyColumnSet,
idesc);
const CorrName& name = getTableDesc()->getNATable()->getTableName();
Scan *scanExpr = new STMTHEAP Scan(name, getTableDesc(), REL_SCAN, STMTHEAP);
scanExpr->setBaseCardinality((Cardinality)totalRowCount.getValue()) ;
GroupAttributes * gaExpr = new STMTHEAP GroupAttributes();
scanExpr->setSelectionPredicates(skey->keyPredicates());
gaExpr->setCharacteristicOutputs(getGroupAttr()->getCharacteristicOutputs());
scanExpr->setGroupAttr(gaExpr);
gaExpr->setLogExprForSynthesis(scanExpr);
EstLogPropSharedPtr outputEstLogProp = scanExpr->getGroupAttr()->outputLogProp((*GLOBAL_EMPTY_INPUT_LOGPROP));
CostScalar keyPredRowCount = outputEstLogProp->getResultCardinality() ;
delete skey;
delete scanExpr; // gaExpr is deleted here too
if (( keyPredRowCount < ((CostScalar)orPredToSemiJoinMaxRatio)*totalRowCount)&&
(numValues > orPredToSemiJoin))
return TRUE; // ratio of rows chosen by keypreds is less than specified
// by the default OR_PRED_TO_SEMIJOIN_PROBES_MAX_RATIO
} // end of isBigTable IF block
} // end of loop over all index paths
// part key and key column heuristic did not apply
return FALSE;
}
// ***********************************************************************
// $$$$ Tuple
// methods for class Tuple
// ***********************************************************************
// ***********************************************************************
// $$$$ GenericUpdate
// member functions for class GenericUpdate
// ***********************************************************************
void GenericUpdate::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
// ---------------------------------------------------------------------
// Transform the child,
// unless it's a leaf op introduced by Binder Index Maintenance.
// ---------------------------------------------------------------------
ValueId val_id;
if (child(0)) {
// Make values available to child
child(0)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
child(0)->transformNode(normWARef, child(0));
} else
CMPASSERT(getOperator().match(REL_ANY_LEAF_GEN_UPDATE));
// only if update and scan on the same table,
// i.e. no temp tables are involved
if (((getOperatorType() == REL_UNARY_UPDATE ||
getOperatorType() == REL_UNARY_DELETE))){
if(child(0)->getOperatorType() == REL_SCAN) {
Scan * scanNode = (Scan *)(child(0)->castToRelExpr());
const NATable *scanTable = scanNode->getTableDesc()->getNATable();
if(scanTable->getSpecialType() != ExtendedQualName::TRIGTEMP_TABLE){
ValueIdList topValueIds = oldToNewMap().getTopValues();
ValueIdList bottomValueIds = oldToNewMap().getBottomValues();
for (CollIndex v = 0; v < topValueIds.entries();v++){
normWARef.addVEG(topValueIds[v],bottomValueIds[v]);
}
}
}
}
// ---------------------------------------------------------------------
// Transform the computable expressions associated with me.
// If a subquery appears in the compute list, then let the subquery
// transformation cause a semijoin to be performed between the
// child of the GenericUpdate and the GenericUpdate.
// ---------------------------------------------------------------------
NABoolean origInGenericUpdateAssignFlag(normWARef.inGenericUpdateAssign());
normWARef.setInGenericUpdateAssign(TRUE);
if (newRecExpr().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */) )
{
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
normWARef.setInGenericUpdateAssign(TRUE);
// QSTUFF
if (newRecBeforeExpr().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */) )
{
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
// QSTUFF
if (isMerge())
{
normWARef.setInGenericUpdateAssign(TRUE) ;
if (mergeInsertRecExpr().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */) )
{
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(normWARef, child(0));
}
normWARef.setInGenericUpdateAssign(origInGenericUpdateAssignFlag) ;
// remember previous "are we in mergeUpdateWhere?" flag
NABoolean origInMergeUpdWhere(normWARef.inMergeUpdWhere());
normWARef.setInMergeUpdWhere(TRUE); // we're in a mergeUpdateWhere
if (mergeUpdatePred().transformNode
(normWARef, child(0), getGroupAttr()->getCharacteristicInputs(),
FALSE /* Move predicates */) )
{ // restore previous "are we in mergeUpdateWhere?" flag
normWARef.setInMergeUpdWhere(origInMergeUpdWhere) ;
// Transform my new child.
child(0)->transformNode(normWARef, child(0));
}
// restore previous "are we in mergeUpdateWhere?" flag
normWARef.setInMergeUpdWhere(origInMergeUpdWhere) ;
}
ValueId exprId;
for (exprId = newRecExpr().init(); newRecExpr().next(exprId); newRecExpr().advance(exprId))
{
ItemExpr *thisIE = exprId.getItemExpr();
thisIE = thisIE->removeOneRowAggregate( thisIE, normWARef );
}
// QSTUFF
for (exprId = newRecBeforeExpr().init(); newRecBeforeExpr().next(exprId); newRecBeforeExpr().advance(exprId))
{
ItemExpr *thisIE = exprId.getItemExpr();
thisIE = thisIE->removeOneRowAggregate( thisIE, normWARef );
}
// QSTUFF
for (exprId = mergeInsertRecExpr().init(); mergeInsertRecExpr().next(exprId); mergeInsertRecExpr().advance(exprId))
{
ItemExpr *thisIE = exprId.getItemExpr();
thisIE = thisIE->removeOneRowAggregate( thisIE, normWARef );
}
for (exprId = mergeUpdatePred().init();
mergeUpdatePred().next(exprId);
mergeUpdatePred().advance(exprId))
{
ItemExpr *thisIE = exprId.getItemExpr();
thisIE = thisIE->removeOneRowAggregate( thisIE, normWARef );
}
// ---------------------------------------------------------------------
// For key expressions only normalize the right hand side of the =
// left side should have been a different valueId from the one below
// ---------------------------------------------------------------------
ValueIdList keyList = beginKeyPred();
if (keyList.entries() > 0)
{
for (CollIndex i = 0; i < keyList.entries(); i++)
{
ItemExpr * eqPtr = ((keyList[i]).getValueDesc())->getItemExpr();
(*eqPtr)[1]->transformNode(normWARef, eqPtr->child(1), child(0),
getGroupAttr()->getCharacteristicInputs());
(*eqPtr)[0]->markAsTransformed();
eqPtr->markAsTransformed();
}
}
// ---------------------------------------------------------------------
// ---------------------------------------------------------------------
beginKeyPred().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs());
// ---------------------------------------------------------------------
// Transform the check constraint expressions.
// Indicate that we are processing a complex scalar expression to
// suppress the performance of transitive closure.
// ---------------------------------------------------------------------
normWARef.setComplexScalarExprFlag();
normWARef.setInConstraintsFlag();
checkConstraints().transformNode(normWARef, child(0),
getGroupAttr()->getCharacteristicInputs());
normWARef.restoreComplexScalarExprFlag();
normWARef.restoreInConstraintsFlag();
// There should be no select predicates here,
// except if it's an embedded insert.
if (!getGroupAttr()->isEmbeddedInsert())
{
CMPASSERT(selectionPred().isEmpty()
// QSTUFF
OR getGroupAttr()->isGenericUpdateRoot()
// QSTUFF
);
}
// fix CR: message bytes increase with rowsets (CR 10-010720-4032)
if ( child(0) )
child(0)->recomputeOuterReferences();
// QSTUFF
if (!selectionPred().isEmpty()){
transformSelectPred(normWARef, locationOfPointerToMe);
}
// QSTUFF
// ---------------------------------------------------------------------
// Transform the entire column list of the base table to pick up
// equivalences of base table columns and index columns
// ---------------------------------------------------------------------
const ValueIdList &allCols = getTableDesc()->getColumnList();
ItemExpr *oldPtr;
ExprValueId newPtr;
ValueId eqVid;
CollIndex i = 0;
for (i = 0; i < allCols.entries(); i++) {
oldPtr = allCols[i].getItemExpr();
newPtr = oldPtr;
oldPtr->transformNode(normWARef, newPtr, locationOfPointerToMe,
getGroupAttr()->getCharacteristicInputs());
// the column list shouldn't be changed by the transformation
CMPASSERT(oldPtr == newPtr.getPtr());
// ---------------------------------------------------------------------
// Create a VEG with all equivalent index columns
// ---------------------------------------------------------------------
if (oldPtr->getOperatorType() == ITM_BASECOLUMN) {
const ValueIdSet &eic = ((BaseColumn *)oldPtr)->getEIC();
for (eqVid = eic.init(); eic.next(eqVid); eic.advance(eqVid)) {
// for trigger temp tables
if (updatedTableName_.getSpecialType() == ExtendedQualName::TRIGTEMP_TABLE &&
getOperatorType() == REL_LEAF_INSERT) {
normWARef.addVEGInOuterRegion(((BaseColumn *)oldPtr)->getValueId(),eqVid);
}
// no trigger temp tables
else {
normWARef.addVEG(((BaseColumn *)oldPtr)->getValueId(),eqVid);
}
}
}
else {
CMPASSERT(oldPtr->getOperatorType() == ITM_BASECOLUMN);
}
}
// ---------------------------------------------------------------------
// Prime the Group Attributes for the GenericUpdate.
// ---------------------------------------------------------------------
primeGroupAttributes();
} // GenericUpdate::transformNode()
// -----------------------------------------------------------------------
// GenericUpdate::rewriteNode()
// -----------------------------------------------------------------------
void GenericUpdate::rewriteNode(NormWA & normWARef)
{
// QSTUFF
const ValueIdList &allCols = getTableDesc()->getColumnList();
ItemExpr *newPtr = NULL;
// ---------------------------------------------------------------------
// walk through all the columns of the table, normalizing them
// and adding the result into the ColumnVEGList of the table descriptor
// ---------------------------------------------------------------------
CollIndex j = 0;
for (j = 0; j < allCols.entries(); j++)
{
// ---------------------------------------------------------------------
// Create a VEG with all equivalent index columns
// ---------------------------------------------------------------------
newPtr = allCols[j].getItemExpr()->normalizeNode(normWARef);
getTableDesc()->addToColumnVEGList(newPtr->getValueId());
}
// -------------------------------------------------------------------------
// Normalize the indexes.
// -------------------------------------------------------------------------
for (j = 0;
j < (Int32)getTableDesc()->getIndexes().entries();
j++)
{
IndexDesc *idesc = getTableDesc()->getIndexes()[j];
ValueIdList indexOrder(idesc->getOrderOfKeyValues());
// ---------------------------------------------------------------------
// Normalize the asc/desc order of the index.
// ---------------------------------------------------------------------
indexOrder.normalizeNode(normWARef);
idesc->setOrderOfKeyValues(indexOrder);
// ---------------------------------------------------------------------
// Normalize the partitioning keys in the partitioning function.
// ---------------------------------------------------------------------
if (idesc->isPartitioned())
idesc->getPartitioningFunction()->normalizePartitioningKeys(normWARef);
}
// we need to normalize the potential outputs here to avoid problems
// during code generation
potentialOutputs_.normalizeNode(normWARef);
// QSTUFF
precondition_.normalizeNode(normWARef);
// these are no longer used in the following phases,
// so remove them instead of rewriting them
exprsInDerivedClasses_.clear();
// ---------------------------------------------------------------------
// Rewrite the expressions in the selection predicates and
// in the Group Attributes.
// ---------------------------------------------------------------------
RelExpr::rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Rewrite values in the newrec expressions.
// ---------------------------------------------------------------------
if (newRecExpr().normalizeNode(normWARef))
{
}
// QSTUFF
if (newRecBeforeExpr().normalizeNode(normWARef))
{
}
// QSTUFF
if (executorPred().normalizeNode(normWARef))
{
}
if (isMerge())
{
if (mergeInsertRecExpr().normalizeNode(normWARef))
{
}
if (mergeUpdatePred().normalizeNode(normWARef))
{
}
}
// ---------------------------------------------------------------------
// Rewrite expressions in the order by list, if this is an insert.
// ---------------------------------------------------------------------
if (getOperatorType() == REL_UNARY_INSERT)
{
Insert * ins
= (Insert *)(this->castToRelExpr());
if (ins->reqdOrder().normalizeNode(normWARef))
{
}
}
/*
// QSTUFF
// this has been moved up before rewriting the index item expressions
// ---------------------------------------------------------------------
// walk through all the columns of the table, normalizing them
// and adding the result into the ColumnVEGList of the table descriptor
// ---------------------------------------------------------------------
const ValueIdList &allCols = getTableDesc()->getColumnList();
ItemExpr *newPtr = NULL;
for (CollIndex i = 0; i < allCols.entries(); i++)
{
// ---------------------------------------------------------------------
// Create a VEG with all equivalent index columns
// ---------------------------------------------------------------------
newPtr = allCols[i].getItemExpr()->normalizeNode(normWARef);
getTableDesc()->addToColumnVEGList(newPtr->getValueId());
}
// QSTUFF
*/
// ---------------------------------------------------------------------
// Rewrite values in the key expressions.
// ---------------------------------------------------------------------
// For key expressions only normalize the right hand side of the =
// left side should have been a different valueId from the one below
ValueIdList keyList = beginKeyPred();
if (keyList.entries() > 0)
{
for (CollIndex i = 0; i < keyList.entries(); i++)
{
ItemExpr * eqPtr = ((keyList[i]).getValueDesc())->getItemExpr();
ItemExpr * right_side = (*eqPtr)[1]->normalizeNode(normWARef);
eqPtr->child(1) = right_side;
}
}
// ---------------------------------------------------------------------
// Rewrite the ValueIdMap between the select and the update part so
// it has VEGReferences init (note that we avoided VEGies that span
// both the select and the update part, this is (probably?) one
// reason why we only normalized one half of the keys preds above.
// ---------------------------------------------------------------------
updateToSelectMap_.normalizeNode(normWARef);
// ---------------------------------------------------------------------
// Rewrite values in the check constraint expressions.
// Indicate that we are processing a complex scalar expression to
// suppress the performance of transitive closure.
// ---------------------------------------------------------------------
normWARef.setComplexScalarExprFlag();
normWARef.setInConstraintsFlag();
if (checkConstraints().normalizeNode(normWARef))
{
}
normWARef.restoreComplexScalarExprFlag();
normWARef.restoreInConstraintsFlag();
// ---------------------------------------------------------------------
// Rewrite the expressions in the TriggerBindInfo object which is part
// of the inlining info.
// ---------------------------------------------------------------------
if (getInliningInfo().getTriggerBindInfo())
{
getInliningInfo().getTriggerBindInfo()->normalizeMembers(normWARef);
}
} // GenericUpdate::rewriteNode()
// -----------------------------------------------------------------------
// GenericUpdate::recomputeOuterReferences()
// -----------------------------------------------------------------------
void GenericUpdate::recomputeOuterReferences()
{
// Should replace with appropriate virtual methods
// Solution 10-040114-2405 start
// Our transformation for input rowsets always involves a unpack and a
// flow operator. Hence we shouldnt be accessing any input rowset
// directly. Remove it's reference from the required inputs
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
ValueId vid;
ValueIdSet inRowsets;
ItemExpr *ie ;
for (vid = outerRefs.init(); outerRefs.next(vid); outerRefs.advance(vid))
{
ie = vid.getItemExpr();
if (ie->getOperatorType() != ITM_CONSTANT) {
if ((vid.getType().getTypeQualifier() == NA_ROWSET_TYPE) ||
(( ie->getOperatorType() == ITM_DYN_PARAM) &&
(((DynamicParam *) ie)->getRowsetSize() != 0)))
inRowsets +=vid;
}
}
// Remove input rowset references
outerRefs -=inRowsets;
// Solution 10-040114-2405 end
if ((getOperatorType() != REL_UNARY_INSERT) && (getOperatorType() != REL_LEAF_INSERT) &&
(getOperatorType() != REL_UNARY_DELETE) && (getOperatorType() != REL_LEAF_DELETE) &&
(getOperatorType() != REL_UNARY_UPDATE) && (getOperatorType() != REL_LEAF_UPDATE)) {
getGroupAttr()->setCharacteristicInputs(outerRefs);
return;
}
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
ValueIdSet allMyExpr(newRecExpr());
allMyExpr += newRecBeforeExpr();
allMyExpr += executorPred();
allMyExpr += usedColumns();
allMyExpr += getSelectionPred();
allMyExpr += exprsInDerivedClasses_;
ValueIdSet beginKeyPredSet(beginKeyPred());
allMyExpr += beginKeyPredSet;
if (isMerge())
{
allMyExpr += mergeInsertRecExpr();
allMyExpr += mergeUpdatePred();
}
allMyExpr.weedOutUnreferenced(outerRefs);
// Add references needed by children, if any
Int32 arity = getArity();
for (Int32 i = 0; i < arity; i++)
{
outerRefs += child(i).getPtr()->getGroupAttr()->getCharacteristicInputs();
}
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // GenericUpdate::recomputeOuterReferences
// -----------------------------------------------------------------------
// GenericUpdate::normalizeNode
// -----------------------------------------------------------------------
RelExpr * GenericUpdate::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
if (isMerge())
normWARef.incrementMergeUpdDelCount();
// Call the super class to do the normalization work.
RelExpr *normalizedThis = RelExpr::normalizeNode(normWARef);
if ((getOperator().match(REL_ANY_GEN_UPDATE) || // general update cases
getOperator().match(REL_UNARY_INSERT) // update of a key column
)
&&
(getInliningInfo().hasTriggers() || // driving trigger temp table insert
getInliningInfo().isMVLoggingInlined() // driving MV IUD log insert
)
)
{
Lng32 actualMessageSize = getGroupAttr()->getCharacteristicOutputs().getRowLength();
// 2 headers: one for record header and the other for the message header
Lng32 maxMessageSize = (ActiveSchemaDB()->getDefaults().getAsULong(LOCAL_MESSAGE_BUFFER_SIZE) * 1024) -
(2*(ActiveSchemaDB()->getDefaults().getAsULong(DP2_MESSAGE_HEADER_SIZE_BYTES)));
// check row size against max executor message buffer size
if (actualMessageSize >= maxMessageSize)
{
Lng32 tableRecordLength = getTableDesc()->getNATable()->getRecordLength();
NAString tableName = getTableDesc()->getNATable()->getTableName().getQualifiedNameAsAnsiString();
*CmpCommon::diags() << DgSqlCode(-12070)
<< DgString0(tableName)
<< DgInt0(tableRecordLength)
<< DgInt1((Lng32)maxMessageSize/2);
return this;
}
}
/// YYY
if (getOperator().match(REL_ANY_UNARY_GEN_UPDATE))
{
Scan * scan = getLeftmostScanNode();
if (scan && scan->requiresHalloweenForUpdateUsingIndexScan())
setAvoidHalloween(TRUE);
}
if (producedMergeIUDIndicator_ != NULL_VALUE_ID)
{
ValueId dummy;
if (NOT getGroupAttr()->getCharacteristicOutputs().referencesTheGivenValue(
producedMergeIUDIndicator_,
dummy))
// nobody asked for the merge IUD indicator, therefore remove
// it, (e.g. simple table without index maintenance)
producedMergeIUDIndicator_ = NULL_VALUE_ID;
}
return normalizedThis;
}
// -----------------------------------------------------------------------
// Insert::normalizeNode()
// The purpuse of this method is to eliminate the Tuple node of an
// INSERT-VALUES statement. After normalization is done, the Tuple node
// becomes redundant, since all the information is inside the Insert node
// anyway.
// -----------------------------------------------------------------------
RelExpr * Insert::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
// Call the super class to do the normalization work.
RelExpr *normalizedThis = GenericUpdate::normalizeNode(normWARef);
// If this already is a LeafInsert node - the work is done.
if (normalizedThis->getOperatorType() == REL_LEAF_INSERT)
return normalizedThis;
// If there is an ORDER BY + a [first n], copy the ORDER BY ValueIds
// down to the FirstN node so we order the rows before taking the first n.
// If it is ORDER BY + [any n] we don't do this, as it is sufficient
// and more efficient to sort the rows after taking just n of them.
// Note: We do this at normalize time instead of bind time because if
// there are complex expressions in the ORDER BY, the binder will get
// different ValueIds for the non-leaf nodes which screws up coverage
// tests. Doing it here the ValueIds have already been uniquely computed.
if ((reqdOrder().entries() > 0) &&
(child(0)->getOperatorType() == REL_FIRST_N))
{
FirstN * firstn = (FirstN *)child(0)->castToRelExpr();
if (firstn->isFirstN()) // that is, [first n], not [any n] or [last n]
firstn->reqdOrder().insert(reqdOrder());
}
// If the child is not a Tuple node - nothing to do here.
CMPASSERT(normalizedThis->getArity() > 0);
if (normalizedThis->child(0)->getOperatorType() != REL_TUPLE)
return normalizedThis;
if (normalizedThis->child(0)->getSelectionPred().isEmpty())
{
// Now get rid of the Tuple node and start a new (although shortlived)
// life as a LeafInsert node. The optimizer will next transform it
// to a DP2Insert node.
normalizedThis->child(0) = (RelExpr *)NULL;
normalizedThis->setOperatorType(REL_LEAF_INSERT);
}
// else this is the case of an insert node to an ON STATEMENT MV
// an insert to a statement MV is inlined with an update to an
// ON STATEMENT MV source table
return normalizedThis;
}
// ***********************************************************************
// $$$$ RelRoot
// member functions for class RelRoot
// ***********************************************************************
// -----------------------------------------------------------------------
// ***NOTE*** These methods must be called AFTER the transformation phase
// or they will not return the correct answer.
//
// A sql statement cursor is updatable if all of the following are true:
// -- it is a SELECT statement
// -- there is only one underlying table, and no subquery references that tbl
// -- there are no aggregates present
// -- neither GROUP BY, DISTINCT, nor ORDER BY is specified
// -- all view columns must be column references
// -- no column reference can occur more than once
// -- The underlying table is not a materialized view
//
// A view is updatable similarly, except that
// -- ORDER BY *is* allowed (if it's allowed in a view at all)
//
// See Ansi 6.3 and 7.9 SR 12, and references to "read-only table".
// -----------------------------------------------------------------------
NABoolean RelRoot::isUpdatableBasic(NABoolean isView,
NABoolean &isInsertable) const
{
CMPASSERT(nodeIsBound() && nodeIsTransformed());
// ## Must ensure this still works when we have updatable Stored Procedures
// QSTUFF
Scan *scan;
GenericUpdate *gu = 0;
// QSTUFF
// QSTUFF
if (child(0)->getGroupAttr()->isEmbeddedUpdateOrDelete() &&
child(0)->getGroupAttr()->isGenericUpdateRoot()){
gu = (GenericUpdate *) child(0)->castToRelExpr();
if (gu->getOperator().match(REL_ANY_UNARY_GEN_UPDATE))
scan = (Scan *)(child(0)->castToRelExpr())->getLeftmostScanNode();
else
return FALSE;
}
else
// QSTUFF
{
scan = (Scan *)child(0)->castToRelExpr();
}
if (scan->getOperatorType() != REL_SCAN)
return FALSE;
if (scan->accessOptions().accessType() == TransMode::READ_UNCOMMITTED_ACCESS_) // "read-only table"
return FALSE;
TransMode::IsolationLevel il;
if ((NOT isView) ||
(CmpCommon::getDefault(ISOLATION_LEVEL_FOR_UPDATES) == DF_NONE))
ActiveSchemaDB()->getDefaults().getIsolationLevel
(il);
else
ActiveSchemaDB()->getDefaults().getIsolationLevel
(il,
CmpCommon::getDefault(ISOLATION_LEVEL_FOR_UPDATES));
if (scan->accessOptions().accessType() == TransMode::ACCESS_TYPE_NOT_SPECIFIED_ &&
il == TransMode::READ_UNCOMMITTED_)
return FALSE;
NATable *naTable = ActiveSchemaDB()->getNATableDB()->get(&
scan->getTableDesc()->getNATable()->getExtendedQualName());
CMPASSERT(naTable);
if (naTable->getReferenceCount() > 1)
// QSTUFF
if (getGroupAttr()->isEmbeddedUpdateOrDelete()){
if (naTable->getReferenceCount() > 2)
return FALSE;
}
else
// QSTUFF
// A subquery references the scan tbl
return FALSE;
if (naTable->isAnMV())
return FALSE; // A materialized view is not updatable. -- MV
if (naTable->isPartitionNameSpecified())
return FALSE; // If the PARTITION clause is specified in the view's query text
// then the view is not updatable.
// Check option can only check predicates in the
// where clause, the partition clause is like an extra predicate, in that it
// restricts the statements action to a single partition. But this extra predicate
// cannot be enforced by our current check option mechanism.
// Similarly if the PARTITION clause is specified
// in the query specification a cursor declaration then the cursor is not updatable.
ValueIdSet selectCols;
if (isView)
{
for (CollIndex i = 0; i < compExpr().entries(); i++)
{
ValueId idcol = compExpr()[i];
const NAColumn *nacol = idcol.getNAColumn(TRUE/*okIfNotColumn*/);
if (!nacol) // not a column reference
return FALSE;
// QSTUFF
// in case of an embedded update within a view there may be an old and
// a new column pointing to the same base table. We have to detect that
// and prevent those views to be updatable.
if (getGroupAttr()->isEmbeddedUpdateOrDelete())
{
CMPASSERT(gu);
for (CollIndex j = 0;
j < gu->getTableDesc()->getColumnList().entries();
j++)
{
if ( gu->getTableDesc()->
getColumnList()[j].getItemExpr()->getValueId() == idcol)
{
idcol = scan->getTableDesc()->
getColumnList()[j].getItemExpr()->getValueId();
}
}
}
// QSTUFF
if (selectCols.contains(idcol)) // colref appears multiple times
return FALSE; // (cf. errors 4017, 4022 in Binder)
selectCols += idcol;
// A system column is ok as long as user doesn't actually UPDATE or INSERT
// it (by definition, the system supplies a default when not explicitly
// named in INSERT)
// if (nacol->isSystemColumn() && // cf. error 4013 in Binder
// nacol->getDefaultValue() == NULL)
// isInsertable = FALSE;
}
}
// All columns not selected in the view must have a default value
// for the view to be "insertable" (Tandem notion, not Ansi;
// see SCMPBIDD for SQL/MP definition).
// We don't care what default info a system column has;
// by definition a system column is always filled in (defaulted).
// Cf. error 4024 in Binder.
if (isView)
{
const ValueIdList &allCols = scan->getTableDesc()->getColumnList();
for (CollIndex i = 0; i < allCols.entries(); i++)
{
const ValueId idcol = allCols[i];
const NAColumn *nacol = idcol.getNAColumn();
if (!selectCols.contains(idcol) &&
!nacol->getDefaultValue() &&
!nacol->isSystemColumn())
{
isInsertable = FALSE;
break;
}
} // for allCols
} // isView
return TRUE;
}
NABoolean RelRoot::isUpdatableCursor() // this is NOT const
{
NABoolean junk;
if (!isUpdatableBasic(FALSE, junk)) return FALSE;
// Ansi 13.1 SR 5a -- no updatability clause specified, but ORDER BY was.
if (!updatableSelect_)
if (reqdOrder().entries()) return FALSE; // ORDER BY col-list
// ##When INSENSITIVE and SCROLL are supported, this rule also applies
// The following mods to the updatable-column list are only done if
// we have to (for efficiency).
if (!updateCol().entries() || reqdOrder().entries()) {
// "FOR UPDATE;" w/o col-list --
// is equivalent to "FOR UPDATE OF all-cols", per Ansi 13.1 SR 5b + 13.
//
ValueIdSet upd(updateCol());
if (!upd.entries()) {
const ColumnDescList &cols =
*getScanNode()->getRETDesc()->getColumnList();
for (CollIndex i = 0; i < cols.entries(); i++) {
const ValueId idcol = cols[i]->getValueId();
const NAColumn *nacol = idcol.getNAColumn(TRUE/*okIfNotColumn*/);
if (nacol)
upd += idcol;
}
}
// Genesis 10-990201-0094. Ansi 17.18 SR 5.
// Remove any ORDER BY cols from the FOR UPDATE OF cols,
// then we let cli/Statement handle it (error CLI_INVALID_UPDATE_COLUMN).
// ## We really should enhance StaticCompiler to catch these
// ## syntax errors (and also CLI_NON_UPDATABLE_SELECT_CURSOR)
// ## at compile-time not run-time.
// ## This would require intersecting an updateWhereCurrentOf's
// ## newRecExpr's target columns' NAColumns (or full-col-names)
// ## with its
// ## cursor's updateCols' NAColumns (or full-col-names),
// ## via some extra lookup in cursor PLTs in StmtDeclStatCurs process().
upd -= ValueIdSet(reqdOrder());
updateCol() = ValueIdList(upd); // this is NOT const
}
if (!updateCol().entries()) return FALSE;
return TRUE;
}
NABoolean RelRoot::isUpdatableView(NABoolean &isInsertable) const
{
isInsertable = TRUE;
if (!isUpdatableBasic(TRUE, isInsertable))
{
isInsertable = FALSE;
return FALSE;
}
return TRUE;
}
void RelRoot::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
markAsTransformed();
// QSTUFF
// in case of embedded updates or deletes we have to prevent outer
// predicates being pushed into subtrees being generated by a generic
// update. We achieve that by recording whether a node is at the root
// of a GenericUpdate subtree and preventing predicates being pushed
// beyond that node. By contruction we know that those nodes are either
// anti semi-joins or unary updates.
// we prevent predicates from being pushed down by forcing them not to be
// covered by the coverTest method which in turn causes them not to be
// pushed down by pushdowncoveredExpressions.
// This works fine execpt for equality predicates (x.x = 10) which are usually
// veggyfied by the transformation pass. Since the compiler assumes that all those
// predicates have been pushed down to the leaves it will just forget about
// them at code generation time. To prevent that we disable generation of
// veggies for constant equality termsand rely on pushdown covered
// expression to do the right thing...which seems to work just fine.
if (getGroupAttr()->isEmbeddedUpdateOrDelete())
normWARef.setInEmbeddedUpdateOrDelete(TRUE);
// QSTUFF
// Embedded insert has the same equality predicate pushdown problems
// as embedded updates or deletes. Set the flag to prevent the pushdown.
if (getGroupAttr()->isEmbeddedInsert())
normWARef.setInEmbeddedInsert(TRUE);
// ---------------------------------------------------------------------
// Make a working copy of the NormWA for each (sub)query tree.
// ---------------------------------------------------------------------
NormWA newNormWA(normWARef);
// ---------------------------------------------------------------------
// Each Subquery represents its own region for the construction of
// VEGPredicates.
// ---------------------------------------------------------------------
if (isTrueRoot())
newNormWA.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,this);
else
newNormWA.clearStateInformation(); // each subquery tree has its own state
// RelRoots predicates in the selectPred() are to be evaluated
// above the context of the RelRoot. Predicates from the child of
// the RelRoot are to stay there.
//
// Predicates are found at this stage in a RelRoot were pushed
// down by the parent to be transformed here. The parent was either
// a Rename node with or I'm the right child of a Semi or Outer Join.
//
// ---------------------------------------------------------------------
// Make values available to child
// ---------------------------------------------------------------------
child(0)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
// ---------------------------------------------------------------------
// Transform the child
// ---------------------------------------------------------------------
child(0)->transformNode(newNormWA, child(0));
if ((isTrueRoot()) &&
(child(0)) &&
((child(0)->getOperatorType() == REL_SORT_LOGICAL) ||
((child(0)->getOperatorType() == REL_FIRST_N) &&
((child(0)->child(0)) &&
(child(0)->child(0)->getOperatorType() == REL_SORT_LOGICAL)))))
{
SortLogical * sl = NULL;
if (child(0)->getOperatorType() == REL_SORT_LOGICAL)
sl = (SortLogical*)child(0)->castToRelExpr();
else
sl = (SortLogical*)child(0)->child(0)->castToRelExpr();
if (NOT hasOrderBy())
{
// move order by sort key from SortLogical child to me.
reqdOrder() = sl->getSortKey();
}
}
// ---------------------------------------------------------------------
// Transform the computable expressions associated with me.
// If a subquery appears in the compute list, then let the subquery
// transformation cause a semijoin to be performed between the
// child of the RelRoot and the subquery.
// ---------------------------------------------------------------------
newNormWA.setInSelectList() ;
if (compExpr().transformNode(newNormWA, child(0),
getGroupAttr()->getCharacteristicInputs()))
{
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
child(0)->transformNode(newNormWA, child(0));
}
newNormWA.restoreInSelectList() ;
// ---------------------------------------------------------------------
// Definitely no subqueries in the host variables, dynamic parameters
// and constant values.
// ---------------------------------------------------------------------
if (inputVars().transformNode(newNormWA, child(0),
getGroupAttr()->getCharacteristicInputs()))
{
ABORT("Internal error in RelRoot::transformNode - subquery in inputVars");
}
// ---------------------------------------------------------------------
// Definitely no subqueries in the order by list, at least until SQL MCLLXIV!
// ---------------------------------------------------------------------
if (reqdOrder().transformNode(newNormWA, child(0),
getGroupAttr()->getCharacteristicInputs()))
{
ABORT("Internal error in RelRoot::transformNode - subquery in reqdOrder");
}
pullUpPreds();
// transform the selection predicates
transformSelectPred(newNormWA, locationOfPointerToMe);
// We are currently assuming that no subqueries have been introduced above me;
// any new subquery parent would just silently be ignored!
CMPASSERT( this == locationOfPointerToMe ); // Genesis 10-970828-6025
normWARef.setCorrelatedSubqCount(newNormWA.getCorrelatedSubqCount());
normWARef.setContainsSemiJoinsToBeTransformed
(newNormWA.containsSemiJoinsToBeTransformed());
if (isTrueRoot())
{
// -----------------------------------------------------------------
// Sometimes a Left Join can be transformed to an Inner Join if
// there are binary comparison predicates that can filter out
// null augmented rows. In such a case, the VEGRegion created
// the Left Join needs to be merged into its parent VEGRegion.
// -----------------------------------------------------------------
normWARef.processVEGRegions();
// Restore the original VEGRegion.
newNormWA.restoreOriginalVEGRegion();
// if updatability of the cursor was not disabled explicitly
// by specifying a READ ONLY clause, then check to see if
// the cursor really is updatable. Retrieve child's pkeys,
// if the cursor is updatable.
if (updatableSelect() == TRUE
// QSTUFF
&&
// we allow simple views containing embedded deletes
// to be updated...but that does not translate into an
// updatable cusor
! child(0)->getGroupAttr()->isGenericUpdateRoot()
// QSTUFF
)
{
if (isUpdatableCursor())
{
updatableSelect() = TRUE;
// add child's clustering key columns to pkeyList.
// Convert nodes are added to convert the key value to
// the actual key type at runtime. The key value id gets
// replaced by a veg ref, so it is important that we 'remember'
// what the correct key type is and then convert to that type.
// This list is used to generate expression to compute a row
// of primary key values that will be returned to CLI so it
// could be passed in to an UPDATE...WHERE CURRENT OF... query.
// if child is a FirstN node, skip it.
Scan * scan = NULL;
if ((child(0)->castToRelExpr()->getOperatorType() == REL_FIRST_N) &&
(child(0)->child(0)))
scan = (Scan *)child(0)->child(0)->castToRelExpr();
else
scan = (Scan *)child(0)->castToRelExpr();
const ValueIdList * keyList =
&(scan->getTableDesc()->getClusteringIndex()->getIndexKey());
CollIndex i = 0;
for (i = 0; i < keyList->entries(); i++)
{
ItemExpr * castNode =
new(newNormWA.wHeap()) Cast((*keyList)[i].getItemExpr(),
&((*keyList)[i].getType()));
castNode->synthTypeAndValueId();
pkeyList().insert(castNode->getValueId());
}
ValueIdList nonKeyColList;
scan->getTableDesc()->getClusteringIndex()->getNonKeyColumnList(nonKeyColList);
for (i = 0; i < nonKeyColList.entries(); i++)
{
ItemExpr * castNode =
new(newNormWA.wHeap()) Cast(nonKeyColList[i].getItemExpr(),
&(nonKeyColList[i].getType()));
castNode->synthTypeAndValueId();
pkeyList().insert(castNode->getValueId());
}
} // updatable cursor select
else // nonupdatable cursor
{
updatableSelect() = FALSE;
if (updateColTree_)
{
// cursor has FOR UPDATE OF clause that can't be honored.
*CmpCommon::diags() << DgSqlCode(-4118);
locationOfPointerToMe = (RelExpr*)NULL;
}
}
}
else
updatableSelect() = FALSE;
}
else
{
// -----------------------------------------------------------------
// Modify the Group Attributes of my child so that it receives all
// the input values that I receive.
// Assign my selection predicates to the child.
// -----------------------------------------------------------------
child(0)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
child(0)->selectionPred() += getSelectionPred();
// -- Triggers
child(0)->getInliningInfo().merge(&getInliningInfo());
locationOfPointerToMe = child(0); // my parent now -> my child
child(0)->setFirstNRows(getFirstNRows());
deleteInstance(); // Goodbye!
} // eliminate intermediate RelRoots
} // RelRoot::transformNode()
// -----------------------------------------------------------------------
// RelRoot::pullUpPreds()
// -----------------------------------------------------------------------
void RelRoot::pullUpPreds()
{
// A RelRoot never pulls up predicates from its children.
child(0)->recomputeOuterReferences();
} // RelRoot::pullUpPreds()
// -----------------------------------------------------------------------
// RelRoot::recomputeOuterReferences()
// -----------------------------------------------------------------------
void RelRoot::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty())
{
ValueIdSet leafValues, emptySet;
GroupAttributes emptyGA;
child(0)->getGroupAttr()->getCharacteristicInputs().
getLeafValuesForCoverTest(leafValues, emptyGA, emptySet);
CMPASSERT((getGroupAttr()->getCharacteristicInputs().contains
(child(0)->getGroupAttr()->getCharacteristicInputs())) ||
(getGroupAttr()->getCharacteristicInputs().contains (leafValues)));
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// Remove from outerRefs those valueIds that are not needed
// by my selection predicate or by my computed expression list.
// Need to add the orderby list since it is not a subset of the
// computed expression list.
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr.insertList(compExpr());
allMyExpr.insertList(reqdOrder());
allMyExpr.weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my child need.
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// set my Character Inputs to this new minimal set.
getGroupAttr()->setCharacteristicInputs(outerRefs);
}
} // RelRoot::recomputeOuterReferences()
// -----------------------------------------------------------------------
// RelRoot::rewriteNode()
// -----------------------------------------------------------------------
void RelRoot::rewriteNode(NormWA & normWARef)
{
CMPASSERT(isTrueRoot());
// ---------------------------------------------------------------------
// Save the original external inputs. The original values have to be
// made available by someone and that someone is the top root.
// --------------------------------------------------------------------
ValueIdSet externalInputs(getGroupAttr()->getCharacteristicInputs());
// ---------------------------------------------------------------------
// Rewrite the value expressions using the VEG expressions that are
// created when the transitive closure of "=" predicates was computed.
// Transform a Left Join to an Inner Join, whenever possible.
// ---------------------------------------------------------------------
RelExpr::rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Add the original external inputs to the characteristic inputs.
// --------------------------------------------------------------------
getGroupAttr()->addCharacteristicInputs(externalInputs);
// ---------------------------------------------------------------------
// Rewrite expressions in the computable expressions.
// ---------------------------------------------------------------------
if (compExpr().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite expressions in the sort key list.
// ---------------------------------------------------------------------
if (reqdOrder().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite expressions in the pkey list.
// ---------------------------------------------------------------------
if ((updatableSelect() == TRUE) &&
(pkeyList().normalizeNode(normWARef)))
{
}
} // RelRoot::rewriteNode()
// -----------------------------------------------------------------------
// RelRoot::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * RelRoot::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
CMPASSERT(isTrueRoot());
// ---------------------------------------------------------------------
// Locate the VEGRegion for the root.
// ---------------------------------------------------------------------
normWARef.locateAndSetVEGRegion(this);
// ---------------------------------------------------------------------
// Rewrite value expressions in the query tree using the VEG notation.
// Convert Left Joins to Inner Joins, if possible.
// Note that this is an extra walk through the query tree and is
// hidden in between the tranformNode() and normalizeNode() phases.
// Its purpose is to perform a top-down, left-to-right tree walk in
// the transformed tree and initiate the rewrite on its way up.
// This will cause all of the values that are generated at the leaves
// to be normalized, i.e, rewritten in terms of the VEG notation,
// before expressions that reference them further up in the tree
// are normalized.
// ---------------------------------------------------------------------
rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Check which expressions can be evaluated by my child.
// Modify the Group Attributes of those children who inherit some of
// these expressions.
// ---------------------------------------------------------------------
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred()
);
ValueIdList orderByList = reqdOrder();
ValueIdSet myCharInput = getGroupAttr()->getCharacteristicInputs();
// This was added to fix the problem exposed by the case 10-010321-1842
// Compiler failed to create a plan when query had sort order req. by
// column number which is expression containing dynamic parameter and
// covered by another column in RelRoot requiredOutput like
// SELECT a,a/(?p) FROM t ORDER BY 2; For this case we need to
// enforce that Sort operator can sort on this expression by keeping
// parameter ?p in RelRoot child's group requiredInput. Previously,
// expression got removed from this group requiredOutput, the only
// reference to ?p was removed, and as a result ?p was not kept in
// this group requiredInput.
// NOTE. This solution will force the Sort operator to be done
// directly below the Root node.
if (orderByList.entries() > 0)
{
ValueIdSet orderBySet(orderByList),
coveredOrderBySet,
inputsNeededForOrderBy,
coveredOrderBySubExpr,
uncoveredOrderByExpr;
GroupAttributes * childGAPtr = child(0).getPtr()->getGroupAttr();
childGAPtr->coverTest(orderBySet,
myCharInput,
coveredOrderBySet,
inputsNeededForOrderBy,
&coveredOrderBySubExpr);
childGAPtr->addCharacteristicInputs(inputsNeededForOrderBy);
}
// ---------------------------------------------------------------------
// If there is an ORDER BY + a [first n], copy the ORDER BY ValueIds
// down to the FirstN node so we order the rows before taking the first n.
// If it is ORDER BY + [any n] we don't do this, as it is sufficient
// and more efficient to sort the rows after taking just n of them.
// Note: We do this at normalize time instead of bind time because if
// there are complex expressions in the ORDER BY, the binder will get
// different ValueIds for the non-leaf nodes which screws up coverage
// tests. Doing it here the ValueIds have already been uniquely computed.
// ---------------------------------------------------------------------
if ((reqdOrder().entries() > 0) &&
(child(0)->getOperatorType() == REL_FIRST_N))
{
FirstN * firstn = (FirstN *)child(0)->castToRelExpr();
if (firstn->isFirstN()) // that is, [first n], not [any n] or [last n]
firstn->reqdOrder().insert(reqdOrder());
}
// ---------------------------------------------------------------------
// Normalize the child.
// ---------------------------------------------------------------------
child(0) = child(0)->normalizeNode(normWARef);
// ---------------------------------------------------------------------
// Restore the region before returning
// ---------------------------------------------------------------------
normWARef.restoreOriginalVEGRegion();
fixEssentialCharacteristicOutputs();
if (NOT normWARef.getExtraHubVertex())
normWARef.setExtraHubVertex(this);
// ---------------------------------------------------------------------
// Synthesize logical properties
// ---------------------------------------------------------------------
synthLogProp(&normWARef);
normWARef.setMergeUpdDelCount(0);
// check for any errors occured during normalization
if (CmpCommon::diags()->mainSQLCODE() < 0)
return NULL;
else
return this;
} // RelRoot::normalizeNode()
// -----------------------------------------------------------------------
// RelRoot::semanticQueryOptimizeNode()
// -----------------------------------------------------------------------
RelExpr * RelRoot::semanticQueryOptimizeNode(NormWA & normWARef)
{
if (nodeIsSemanticQueryOptimized())
return this;
markAsSemanticQueryOptimized() ;
// sematicQueryOptimize(SQO) is undertaken only if
// (a) there are subqueries that can be unnested OR
// (b) semijoins that can be transformed to inner joins OR
// (c) joins that can be eliminated.
// (d) joins tha can be extra hub
if (normWARef.requiresSemanticQueryOptimization() )
{
// make a copy of the current query tree. If there is an exception
// during the SQO phase we can proceed with the copied.
// SQO can provide impoved performance but is not needed for
// correctness.
RelExpr *copyTree = child(0)->
copyRelExprTree(CmpCommon::statementHeap());
Lng32 numSQOPasses = 0;
Lng32 multiPassJoinElimLimit =
ActiveSchemaDB()->getDefaults().getAsLong(MULTI_PASS_JOIN_ELIM_LIMIT);
try
{
while ((numSQOPasses == 0) ||
(((numSQOPasses < multiPassJoinElimLimit) ||
(multiPassJoinElimLimit < 0)) &&
(normWARef.containsJoinsToBeEliminated() ||
normWARef.checkForExtraHubTables())))
{
normWARef.locateAndSetVEGRegion(this);
normWARef.setCheckForExtraHubTables(FALSE);
// ---------------------------------------------------------------------
// Semantic Query Optimize the child.
// ---------------------------------------------------------------------
child(0) = child(0)->semanticQueryOptimizeNode(normWARef);
child(0) = inlineTempTablesForCSEs(normWARef);
normWARef.restoreOriginalVEGRegion();
normWARef.setExtraHubVertex(NULL);
normWARef.setContainsJoinsToBeEliminated(FALSE);
recursivePushDownCoveredExpr(&normWARef);
numSQOPasses++ ;
}
}
catch(AssertException & e)
{
// Undo any common expression changes done during Unnesting so that
// we can start over.
normWARef.getSqoWA()->undoChanges(normWARef);
*CmpCommon::diags() << DgSqlCode(2078)
<< DgString0(e.getCondition())
<< DgString1(e.getFileName())
<< DgInt0((Lng32)e.getLineNum());
child(0) = copyTree ;
if (normWARef.requiresRecursivePushdown())
{
recursivePushDownCoveredExpr(&normWARef,
FALSE // no need to do any synthLogProp
);
}
}
}
else if (normWARef.requiresRecursivePushdown())
{
recursivePushDownCoveredExpr(&normWARef,
FALSE // no need to do any synthLogProp
);
}
// for debugging
if (normWARef.getCommonSubExprRefCount() > 0 &&
CmpCommon::getDefault(CSE_PRINT_DEBUG_INFO) == DF_ON)
CommonSubExprRef::displayAll();
return this;
} // RelRoot::semanticQueryOptimizeNode()
RelExpr * RelRoot::inlineTempTablesForCSEs(NormWA & normWARef)
{
RelExpr *result = NULL;
const LIST(CSEInfo *) * cses = CmpCommon::statement()->getCSEInfoList();
if (cses && cses->entries() > 0)
{
// If this query tree has any common subexpressions that need
// to be materialized as temp tables, then insert these
// materialization steps (called CTi below) between the root
// and its child node, Q, like this:
//
// Root Root
// | |
// Q MapValueIds
// |
// BlockedUnion
// / \
// Union Q
// / \
// ... CTn
// /
// Union
// / \
// CT1 CT2
//
// The common subexpressions may depend on each other, so make
// sure to create them in the right order and to use blocked
// union instead of a regular union if there are such
// dependencies.
NABitVector toDoVec; // still to be done
NABitVector readyVec; // ready, all predecessors are done
NABitVector doneVec; // already done
// first, figure out all the CSEs that we have to process
for (CollIndex i=0; i<cses->entries(); i++)
if (cses->at(i)->getInsertIntoTemp() != NULL)
toDoVec += i;
// Loop over the to-do list, finding new entries for which we
// already processed all of their predecessors. In this context,
// the children are the predecessors, since we have to build the
// graph bottom-up. In other words, find a topological reverse
// order of the lexical graph of the CSEs.
while (toDoVec.entries() > 0)
{
RelExpr *thisLevelOfInserts = NULL;
for (CollIndex c=0; toDoVec.nextUsed(c); c++)
{
CSEInfo *info = cses->at(c);
// predecessor (child) CSEs that have to be computed before we
// can attempt to compute this one
const LIST(CountedCSEInfo) &predecessors(info->getChildCSEs());
NABoolean isReady = TRUE;
for (CollIndex p=0; p<predecessors.entries(); p++)
{
Int32 cseId = predecessors[p].getInfo()->getCSEId();
CMPASSERT(cses->at(cseId)->getCSEId() == cseId);
if (!doneVec.contains(cseId) &&
cses->at(cseId)->getInsertIntoTemp() != NULL)
// a predecessor CSE for which we have to
// materialize a temp table has not yet
// been processed - can't do this one
isReady = FALSE;
}
if (isReady)
{
// no predecessors or all predecessors have been
// done
readyVec += c;
}
}
// At this point we will have one or more CSEs in readyVec.
// All of their predecessors (if any) have already been
// processed. Now make a Union backbone to process all the
// CSEs in readyVec in parallel.
// If we find nothing, we may have circular dependencies,
// and this is not allowed
// (recursive queries will have to be handled separately)
CMPASSERT(readyVec.entries() > 0);
for (CollIndex r=0; readyVec.nextUsed(r); r++)
{
CSEInfo *info = cses->at(r);
if (thisLevelOfInserts == NULL)
thisLevelOfInserts = info->getInsertIntoTemp();
else
{
thisLevelOfInserts = CommonSubExprRef::makeUnion(
thisLevelOfInserts,
info->getInsertIntoTemp(),
FALSE);
}
} // loop over ready list
if (result == NULL)
result = thisLevelOfInserts;
else
result = CommonSubExprRef::makeUnion(
result,
thisLevelOfInserts,
TRUE);
toDoVec -= readyVec;
doneVec += readyVec;
readyVec.clear();
} // while loop over to-do-list
} // CSEs exist for this statement
if (result)
{
const ValueIdSet &childOutputs(
child(0).getGroupAttr()->getCharacteristicOutputs());
ValueIdList outputValueList;
ValueIdList unionValueList;
// make a final blocked union between the inlined
// insert statements and the actual query
Union *topUnion = CommonSubExprRef::makeUnion(
result,
child(0),
TRUE);
// This top-level union has a right child that produces the
// desired outputs. The left child produces fake dummy ValueIds,
// it doesn't produce any rows. Since the root expects the right
// child's ValueIds, we put a MapValueIds on top that maps the
// values back to what they were in the right child.
for (ValueId o=childOutputs.init();
childOutputs.next(o);
childOutputs.advance(o))
{
ItemExpr *leftFake = new(CmpCommon::statementHeap())
NATypeToItem(o.getType().newCopy(CmpCommon::statementHeap()));
leftFake->synthTypeAndValueId();
ValueIdUnion *vidUnion = new(CmpCommon::statementHeap())
ValueIdUnion(leftFake->getValueId(),
o,
NULL_VALUE_ID,
topUnion->getUnionFlags());
vidUnion->synthTypeAndValueId();
topUnion->addValueIdUnion(vidUnion->getValueId(),
CmpCommon::statementHeap());
outputValueList.insert(o);
unionValueList.insert(vidUnion->getValueId());
topUnion->getGroupAttr()->addCharacteristicOutput(
vidUnion->getValueId());
}
result = new(CmpCommon::statementHeap())
MapValueIds(topUnion,
ValueIdMap(outputValueList, unionValueList),
CmpCommon::statementHeap());
result->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
result->getGroupAttr()->addCharacteristicInputs(
topUnion->getGroupAttr()->getCharacteristicInputs());
result->getGroupAttr()->setCharacteristicOutputs(childOutputs);
result->synthLogProp(&normWARef);
}
else
// no change, return child pointer
result = child(0);
return result;
}
// -----------------------------------------------------------------------
// Filter::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * Filter::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
markAsNormalized();
ValueIdSet outerReferences, nonLocalPreds;
ValueIdSet predsToPushDown, valuesReqdByParent, availableInputs;
// differs from the base class implementation in that
// predicates with outer references are not pushed down to child but
// are retained in this Filter node.
availableInputs = getGroupAttr()->getCharacteristicInputs();
availableInputs.getOuterReferences(outerReferences);
availableInputs -= outerReferences ;
predsToPushDown = selectionPred() ;
if (selectionPred().getReferencedPredicates(outerReferences, nonLocalPreds))
{
predsToPushDown -= nonLocalPreds;
computeValuesReqdForPredicates(nonLocalPreds,
valuesReqdByParent) ;
}
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
availableInputs,
predsToPushDown,
&valuesReqdByParent);
CMPASSERT( predsToPushDown.isEmpty() );
child(0) = child(0)->normalizeNode(normWARef);
fixEssentialCharacteristicOutputs();
return this;
} // Filter::normalizeNode()
// -----------------------------------------------------------------------
// SortLogical::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * SortLogical::normalizeNode(NormWA & normWARef)
{
if (nodeIsNormalized())
return this;
RelExpr::normalizeNode(normWARef);
// eliminate me, I am no longer needed.
return child(0);
} // SortLogical::normalizeNode()
NABoolean RelExpr::hasFilterChild()
{
if (getArity() == 1 && child(0)->getOperatorType() == REL_FILTER)
return TRUE;
else if (getArity() == 1 && child(0)->getArity() == 1 &&
child(0)->child(0)->getOperatorType() == REL_FILTER)
return TRUE;
else
return FALSE;
}
// If subquery unnesting fails for some reason at a particular level
// then the Filter node at that level can be elimated by pushing
// its selection predicate to its child. This is not strictly necessary
// as the optimizer has Rules to eliminate Filter nodes. But we do so
// since it helps with cardinality estimation after the SQO phase.
// The Filter nodes selection predicates are only pushed down to its child,
// aand not any further down the query tree.
void RelExpr::eliminateFilterChild()
{
if(child(0) &&
child(0)->getOperatorType() == REL_FILTER)
{
RelExpr* filterNode = child(0).getPtr() ;
filterNode->pushdownCoveredExpr(
filterNode->getGroupAttr()->getCharacteristicOutputs(),
filterNode->getGroupAttr()->getCharacteristicInputs(),
filterNode->selectionPred());
if (filterNode->selectionPred().isEmpty())
child(0) = filterNode->child(0) ;
else
{
// Pushdown failed to push the predicate for some reason.
// add it by hand and call pushdown again with an empty predicate
// to recompute the IO.
filterNode->child(0)->selectionPred() += filterNode->selectionPred();
filterNode->selectionPred().clear();
filterNode->pushdownCoveredExpr(
filterNode->getGroupAttr()->getCharacteristicOutputs(),
filterNode->getGroupAttr()->getCharacteristicInputs(),
filterNode->selectionPred());
child(0) = filterNode->child(0) ;
}
}
return ;
}
// called at the end of SQO phase tio guarantee that all outputs
// minimal. Prior to this call the SQO phase can have outputs that
// are not minimal for threse three reasons
// (a) unnesting for a subquery failed and predicates from Filter were
// pushed down only to its child
// (b) unnesting for a subquery failed because outputs from left child
// tree could not be prmoted sufficiently. getMoreOutputsIfPossible() can
// leave some nodes with more than the minimal set of outputs in this case.
// (c) The pullUpGroupBy transformation calls pushDownCoveredExpr only
// upto the children of the join being transformed and not all the way down.
void RelExpr::recursivePushDownCoveredExpr(NormWA * normWAPtr,
NABoolean doSynthLogProp)
{
Int32 arity = getArity();
// --------------------------------------------------------------------
// Check which expressions can be evaluated by my child.
// Modify the Group Attributes of those children who
// inherit some of these expressions.
// ---------------------------------------------------------------------
if (getOperator().match(REL_ANY_JOIN))
{
if ((NOT normWAPtr->getExtraHubVertex()) && !isExtraHub())
normWAPtr->setExtraHubVertex(this);
}
pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(),
getGroupAttr()->getCharacteristicInputs(),
selectionPred());
if (getOperator().match(REL_ANY_JOIN) && doSynthLogProp)
{
// Make sure equiJoinPredicates_ gets updated
// in case pushdownCovereExpr() changed any of the joins
// predicates.
synthLogProp();
}
// ---------------------------------------------------------------------
// pushDown expressions from children
// ---------------------------------------------------------------------
for (Int32 i = 0; i < arity; i++)
child(i)->recursivePushDownCoveredExpr(normWAPtr);
if (doSynthLogProp)
processCompRefOptConstraints(normWAPtr);
return;
}
// base class implementation does nothing
void RelExpr::processCompRefOptConstraints(NormWA * normWAPtr)
{
}
NABoolean RelExpr::prepareTreeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &newPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
NABoolean result = TRUE;
CollIndex nc = getArity();
ValueIdSet newLocalPredicates(newPredicatesToAdd);
ValueIdSet newVEGPreds;
newLocalPredicates.findAllOpType(ITM_VEG_PREDICATE, newVEGPreds);
// recursively call this for the children
for (CollIndex i=0; i<nc && result; i++)
{
ValueIdSet childPredsToRemove(predicatesToRemove);
ValueIdSet childPredsToAdd(newPredicatesToAdd);
ValueIdSet childAvailValues(outputsToAdd);
childAvailValues += child(i).getGroupAttr()->getCharacteristicOutputs();
childAvailValues += child(i).getGroupAttr()->getCharacteristicInputs();
childPredsToRemove.removeUnCoveredExprs(childAvailValues);
childPredsToAdd.removeUnCoveredExprs(childAvailValues);
result = child(i)->prepareTreeForCSESharing(
outputsToAdd,
childPredsToRemove,
childPredsToAdd,
inputsToRemove,
valuesForVEGRewrite,
keyColumns,
info);
// if the child already had or has added any of the requested
// outputs, then add them to our own char. outputs
ValueIdSet childAddedOutputs(
child(i).getGroupAttr()->getCharacteristicOutputs());
childAddedOutputs.intersectSet(outputsToAdd);
getGroupAttr()->addCharacteristicOutputs(childAddedOutputs);
// Todo: CSE: consider using recursivePushDownCoveredExpr
// instead of pushing these new predicates in this method
newVEGPreds.intersectSet(childPredsToAdd);
newLocalPredicates -= childPredsToAdd;
}
if (result)
{
// Remove the predicates from our selection predicates.
// Note that prepareMeForCSESharing() is supposed to remove
// these predicates from all other places in the node.
predicates_ -= predicatesToRemove;
// Todo: CSE: need to remove predicates that are "similar" to
// the ones requested, e.g. same columns and constants, but
// an "=" operator with a different ValudId?
// add any predicates that aren't covered by one of the children
// and also add VEGPredicates that are covered by both of the
// children
newLocalPredicates += newVEGPreds;
predicates_ += newLocalPredicates;
// Remove the char. inputs the caller asked to remove.
// At this time we are not doing additional checks to
// ensure these inputs aren't referenced anymore in
// our node. We rely on the caller to ensure that
// these extra inputs are only needed by the predicates
// that we removed.
getGroupAttr()->removeCharacteristicInputs(inputsToRemove);
}
// Call a virtual method on this node to give it a chance to
// remove the predicates from any other places where they might be
// storing them, and to add any outputs it produces locally. Also
// give it a chance to say "no" to the whole idea of pulling out
// predicates and changing char. inputs and outputs (the default
// behavior).
if (result)
result = prepareMeForCSESharing(outputsToAdd,
predicatesToRemove,
newLocalPredicates,
inputsToRemove,
valuesForVEGRewrite,
keyColumns,
info);
return result;
}
// Note that the caller of this method is responsible for adding those
// new outputs to the group attributes that come from the children and
// for removing the requested inputs. The caller also removes
// "predicatesToRemove" from the selection predicates. This method
// only needs to do the following:
// - Add any new outputs to the char. outputs that are generated
// directly by this node (not by its children)
// - Add "newPredicatesToAdd" to any other places where predicates
// are needed, remove then from the selection predicates if they
// should be stored elsewhere
// - Remove "predicatesToRemove" from this node
// (not from the children, that is done by the caller)
// - Make sure that "inputsToRemove" isn't referenced anywhere else
// in this node
NABoolean RelExpr::prepareMeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &newPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
// A class derived from RelExpr must explicitly define
// this method to support being part of a shared CSE
char buf[100];
snprintf(buf, sizeof(buf), "Operator %s not supported",
getText().data());
info->getConsumer(0)->emitCSEDiagnostics(buf);
return FALSE;
}
void Join::processCompRefOptConstraints(NormWA * normWAPtr)
{
if (CmpCommon::getDefault(ELIMINATE_REDUNDANT_JOINS) != DF_OFF)
{
GroupAttributes &myGA = *getGroupAttr();
GroupAttributes &leftGA = *child(0).getGroupAttr();
GroupAttributes &rightGA = *child(1).getGroupAttr();
const ValueIdSet &leftConstraints = leftGA.getConstraints();
const ValueIdSet &rightConstraints = rightGA.getConstraints();
if (normWAPtr && isInnerNonSemiJoin())
matchRIConstraint(leftGA,rightGA, normWAPtr) ;
// Full Outer Join has a join pred that affect the rows that flow from the left
if (NOT isFullOuterJoin())
myGA.addSuitableCompRefOptConstraints(leftConstraints,
getSelectionPredicates(), this);
// only non semi inner join rely solely on selection pred to control rows from
// thr right. Other joins use a join pred also.
if (isInnerNonSemiJoin())
myGA.addSuitableCompRefOptConstraints(rightConstraints,
getSelectionPredicates(), this);
}
}
void GroupByAgg::processCompRefOptConstraints(NormWA * normWAPtr)
{
if (CmpCommon::getDefault(ELIMINATE_REDUNDANT_JOINS) != DF_OFF)
{
getGroupAttr()->addSuitableCompRefOptConstraints
(child(0).getGroupAttr()->getConstraints(),getSelectionPredicates(), this);
}
}
void Filter::processCompRefOptConstraints(NormWA * normWAPtr)
{
if (CmpCommon::getDefault(ELIMINATE_REDUNDANT_JOINS) != DF_OFF)
{
GroupAttributes &myGA = *getGroupAttr();
myGA.addSuitableCompRefOptConstraints
(child(0).getGroupAttr()->getConstraints(),getSelectionPredicates(), this);
}
}
// ***********************************************************************
// $$$$ Rename
// member functions for class Rename,
// used by sub-classes: RenameTable and RenameReference
// ***********************************************************************
void Rename::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
// The rename table node has outlived its usefulness; remove from the tree.
locationOfPointerToMe = child(0);
// Move the predicates down to my child, OR my grandchild if child is a root.
// Move the characteristic inputs down to my child, AND my grandchild if
// child is a root node (the root must have at least as many inputs as any
// of its children -- see assertion in RelRoot::recomputeOuterRefs).
//
// This moving past the root to the grandchild seems like it should be
// unnecessary, that RelRoot::transformNode would do this for us anyway.
// The problem is if this RenameTable is in the topmost (outermost) scope
// and that scope has a predicate containing a subquery -- e.g.
// select * from (select a from t1) x where a>(select b from t2);
// -- without this "grandchild fix" the semijoin introduced by the subquery
// was being placed above the topmost root (!) and that entire subq pred
// was being lost. This was Genesis case 10-970828-6025.
RelExpr *descendant = child(0);
descendant->getGroupAttr()->addCharacteristicInputs // child
(getGroupAttr()->getCharacteristicInputs());
if (descendant->getOperatorType() == REL_ROOT)
descendant = descendant->child(0); // grandchild
descendant->selectionPred() += getSelectionPred(); // child or grandchild
descendant->getGroupAttr()->addCharacteristicInputs // child or grandchild
(getGroupAttr()->getCharacteristicInputs());
// transform my child
locationOfPointerToMe->transformNode(normWARef, locationOfPointerToMe);
// -- Triggers
locationOfPointerToMe->getInliningInfo().merge(&getInliningInfo());
// Verify that my child or whoever replaced it is now transformed
CMPASSERT( locationOfPointerToMe->nodeIsTransformed());
} // Rename::transformNode()
//////////////////////////////////////////////////////////////////////////////
// The purpose of this method is to fix the inputs of the tentative branch
// of the before triggers tree. After binding, the inputs for the temp insert
// side are the expressions that represent the NEW values. These expressions
// should not be inputs, but rather calculated in the temp insert node itself
// using as inputs just basic columns. This method calculates the correct
// inputs based on the inputs from above (inputs of the TSJ node), and the
// values generated below the tentativeGU node (the OLD values).
// When this is done, we call the transformNode() method of the superclass.
//////////////////////////////////////////////////////////////////////////////
void BeforeTrigger::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
// Call the inherited method to do the vanishing trick.
Rename::transformNode(normWARef, locationOfPointerToMe);
if (parentTSJ_ != NULL) // Is this the top most BeforeTrigger node?
{
// Find the interesting nodes we need.
RelExpr *tsjNode = parentTSJ_;
RelExpr *rootNode = tsjNode->child(1);
RelExpr *tempInsert = rootNode->child(0);
RelExpr *tupleNode = tempInsert->child(0);
// locationOfPointerToMe now points to the node below the tentative
// node and before triggers. It's outputs are the values generated
// by the subtree below the original GU (including sub-queries).
const ValueIdSet& generatedValues =
locationOfPointerToMe->getGroupAttr()->getCharacteristicOutputs();
// The inputs of the TSJ node are the values needed from above:
// transition variables and the executeId value.
const ValueIdSet& externalInputs =
tsjNode->getGroupAttr()->getCharacteristicInputs();
// Together they are the set of basic input values the temp Insert
// node needs to evaluate the NEW expressions.
ValueIdSet minInputs;
minInputs.insert(generatedValues);
minInputs.insert(externalInputs);
// The root node has the max required inputs with all the expressions.
ValueIdSet maxInputs(rootNode->getGroupAttr()->getCharacteristicInputs());
// Leave only the inputs required to evaluate the expressions.
// problem is it also weeds out subqueries...
// maxInputs.weedOutUnreferenced(minInputs);
// Set the minimum inputs in all the nodes of the temp insert subtree.
rootNode ->getGroupAttr()->setCharacteristicInputs(minInputs);
tempInsert->getGroupAttr()->setCharacteristicInputs(minInputs);
tupleNode ->getGroupAttr()->setCharacteristicInputs(minInputs);
}
}
// ***********************************************************************
// $$$$ RelRoutine
// member functions for class RelRoutine
//
// The other intermediate classes derived from RelRoutine does not need
// their own recomputeOuterReferences() at this point
// That would be classes like
// TableValuedFunction
// BuiltinTableValuedFunction
// IsolatedNonTableUDR
// for example
// ***********************************************************************
// -----------------------------------------------------------------------
// RelRoutine::transformNode()
// -----------------------------------------------------------------------
void RelRoutine::transformNode(NormWA &normWARef,
ExprGroupId & locationOfPointerToMe)
{
if (nodeIsTransformed())
return;
// ---------------------------------------------------------------------
// Transform the computable expressions associated with me.
// If a subquery appears in the compute list, then let the subquery
// transformation cause a join to be performed between the
// node were we found the reference to the UDF on the left
// and the UDF on the right.
//
// Note that we procInputParamsVids and procInputAllParamsVids may now
// be divergent since we don't transform the the procAllParamsVids
// So we really should not use procAllParamsVids any more!
// ---------------------------------------------------------------------
if (getProcInputParamsVids().transformNode(normWARef, locationOfPointerToMe,
getGroupAttr()->getCharacteristicInputs()))
{
// -----------------------------------------------------------------
// Transform my new child.
// -----------------------------------------------------------------
locationOfPointerToMe->transformNode(normWARef, locationOfPointerToMe);
}
// Make sure all the normal stuff is taken care of.
// We need to do this before we transforms the inputs so that
// we deal with the Tuple Child CallSp inserts below for subqueries
// as an input.
//
// A call with a subquery as an input parameter gets transfered to something
// like this:
//
// At bind time the RelExpr tree for a Call with a subquery as an input
// looks like this:
//
// CallSP
// \
// Tuple(Subq)
//
// After transform it looks like this:
//
// CallSP
// \
// Join
// / \
// Tuple GrbyAgg
// \
// Scan
//
// UDFs will not have the Tuple child and its subqueries
// and UDF as inputs was transformed when we transformed the UDFunction
// ItemExpr earlier.
// transform the selection predicates
transformSelectPred(normWARef, locationOfPointerToMe);
primeGroupAttributes();
markAsTransformed();
}
// -----------------------------------------------------------------------
// RelRoutine::recomputeOuterReferences()
// -----------------------------------------------------------------------
void RelRoutine::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are not referenced
// by the input parameters.
// ---------------------------------------------------------------------
if (NOT getGroupAttr()->getCharacteristicInputs().isEmpty())
{
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// Weed out those inputs not needed by my parameters or
// by my predicates
GroupAttributes emptyGA;
ValueIdSet leafExprSet, emptySet;
ValueIdSet exprSet(getProcInputParamsVids());
exprSet.getLeafValuesForCoverTest(leafExprSet, emptyGA, emptySet);
leafExprSet += getSelectionPred();
leafExprSet.weedOutUnreferenced(outerRefs);
getGroupAttr()->setCharacteristicInputs(outerRefs);
}
}
// -----------------------------------------------------------------------
// RelRoutine::rewriteNode()
// -----------------------------------------------------------------------
void RelRoutine::rewriteNode(NormWA &normWARef)
{
// ---------------------------------------------------------------------
// Make sure to rewrite all of our parameter inputs and predicates.
// ---------------------------------------------------------------------
selectionPred().normalizeNode(normWARef);
getProcInputParamsVids().normalizeNode(normWARef);
getProcOutputParamsVids().normalizeNode(normWARef);
getProcAllParamsVids().normalizeNode(normWARef);
// if a CallSP had a subquery or UDFs as an input parameter it gets attached
// as child(0) at bind time, so we need to rewrite it too. This
// child gets moved by the optimizer - UdrToTSJFlow rule.
// If IsolatedScalarUDFs, on the other hand, contains subqueries or UDFs in
// its input parameters, we transform those the normal way at transform
// time.
if (child(0) != NULL)
child(0)->rewriteNode(normWARef);
// ---------------------------------------------------------------------
// Rewrite my own Group Attributes
// ---------------------------------------------------------------------
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
}
// ***********************************************************************
// $$$$ Tuple
// member functions for class Tuple
// ***********************************************************************
void Tuple::transformNode(NormWA & normWARef,
ExprGroupId &locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
//markAsTransformed(); NO! We call RelExpr::transformNode() below!
ValueIdSet subqueryOrIsolatedUDFunctionPredicates;
// remove the subquery or Isolated UDFunction predicates from the
// tupleExpr() list
tupleExpr().removeSubqueryOrIsolatedUDFunctionPredicates(
subqueryOrIsolatedUDFunctionPredicates);
// -- Triggers
getGroupAttr()->setCharacteristicOutputs(tupleExpr());
// ---------------------------------------------------------------------
// Save the original inputs to use when the subquery predicates get
// transformed.
// ---------------------------------------------------------------------
ValueIdSet externalInputs = getGroupAttr()->getCharacteristicInputs();
// Let RelExpr:: do the work
RelExpr::transformNode(normWARef, locationOfPointerToMe);
// ---------------------------------------------------------------------
// Transform the subqueries or Isolated UDFunctions in the tupleExpr() list
// ---------------------------------------------------------------------
// semiJoin's that are added should be added directly below my
// original parent
if (subqueryOrIsolatedUDFunctionPredicates.transformNode(normWARef,
locationOfPointerToMe,
externalInputs))
{
locationOfPointerToMe->transformNode(normWARef,
locationOfPointerToMe);
// We are on our way back from a number of transformNode()s.
// Let's just make sure that the final usurper got transformed
CMPASSERT( locationOfPointerToMe->nodeIsTransformed());
}
} // Tuple::transformNode()
// -----------------------------------------------------------------------
// Tuple::recomputeOuterReferences()
// -----------------------------------------------------------------------
void Tuple::recomputeOuterReferences()
{
// ---------------------------------------------------------------------
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
// ---------------------------------------------------------------------
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr.insertList(tupleExpr());
allMyExpr.weedOutUnreferenced(outerRefs);
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // Tuple::recomputeOuterReferences()
// -----------------------------------------------------------------------
// Tuple::rewriteNode()
// -----------------------------------------------------------------------
void Tuple::rewriteNode(NormWA & normWARef)
{
// ---------------------------------------------------------------------
// Rewrite the tuple expressions
// ---------------------------------------------------------------------
if (tupleExpr().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite the selection expressions
// ---------------------------------------------------------------------
if (selectionPred().normalizeNode(normWARef))
{
}
// ---------------------------------------------------------------------
// Rewrite my own Group Attributes
// ---------------------------------------------------------------------
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // Tuple::rewriteNode()
// -----------------------------------------------------------------------
// Tuple::normalizeNode()
// -----------------------------------------------------------------------
RelExpr * Tuple::normalizeNode(NormWA & normWARef)
{
// -- Triggers
// If predicates should not be pushed down here, delete them.
if (rejectPredicates() && !selectionPred().isEmpty())
selectionPred().clear();
// Let RelExpr:: do the work
return RelExpr::normalizeNode(normWARef);
}
// ***********************************************************************
// member functions for class TupleList
// ***********************************************************************
void TupleList::transformNode(NormWA & normWARef,
ExprGroupId &locationOfPointerToMe)
{
Tuple::transformNode(normWARef, locationOfPointerToMe);
} // TupleList::transformNode()
// -----------------------------------------------------------------------
// TupleList::recomputeOuterReferences()
// -----------------------------------------------------------------------
void TupleList::recomputeOuterReferences()
{
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
ValueIdSet allMyExpr(getSelectionPred());
ValueIdSet refExpr, emptySet;
GroupAttributes emptyGA;
allMyExpr.insertList(tupleExpr());
tupleExprTree()->getLeafValuesForCoverTest(refExpr, emptyGA, emptySet);
allMyExpr += refExpr;
allMyExpr.weedOutUnreferenced(outerRefs);
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // TupleList::recomputeOuterReferences()
// -----------------------------------------------------------------------
// TupleList::rewriteNode()
// -----------------------------------------------------------------------
void TupleList::rewriteNode(NormWA & normWARef)
{
Tuple::rewriteNode(normWARef);
} // TupleList::rewriteNode()
// ***********************************************************************
// Member functions for class Transpose
// ***********************************************************************
// Transpose::transformNode() -------------------------------------------
// Unconditional query transformations such as the transformation of
// a subquery to a semijoin are implemented by the virtual function
// transformNode(). The aim of such transformations is to bring the
// query tree to a canonical form. transformNode() also ensures
// that the "required" (or characteristic) input values are "minimal"
// and the "required" (or characteristic) outputs values are
// "maximal" for each operator.
//
// transformNode() is an overloaded name, which is used for a set
// of methods that implement the transformation phase of query
// normalization.
//
// We use the term query tree for a tree of relational operators,
// each of which can contain none or more scalar expression trees.
// The transformations performed by transformNode() brings scalar
// expressions into a canonical form. The effect of most such
// transformations is local to the scalar expression tree.
// However, the transformation of a subquery requires a semijoin
// to be performed between the relational operator that contains
// the subquery and the query tree for the subquery. The effect
// of such a subquery transformation is therefore visible not
// only in the scalar expression tree but also in the relational
// expression tree.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
// ExprGroupId & locationOfPointerToMe
// IN : a reference to the location that contains a pointer to
// the RelExpr that is currently being processed.
//
// This implementation is basically the same as the RelExpr:transformNode,
// but here we need to tranform each member of each ValueIdUnion of
// transUnionVals().
//
void Transpose::transformNode(NormWA &normWARef,
ExprGroupId &locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
// If this node has already been transformed, we are done.
//
if (nodeIsTransformed())
return;
// Make sure that it is only transformed once.
//
markAsTransformed();
// transformNode takes up a bound tree and turns into a transformed
// tree. For a RelExpr that means the following.
// + expressions are transformed. If the expressions contain
// subqueries then new RelExpr are created for them and
// they are usually added above (as an ancestor) of the node
// that contained them.
// + predicates are pulled up from the children and their
// required inputs are modified
// + the required inputs of the node the node itself are changed
// from being a sufficient set to being a sufficient minimal
// set.
//
// Transform the child.
// Pull up their transformed predicates
// recompute their required inputs.
//
child(0)->transformNode(normWARef, child(0));
// The child has now been transformed.
// A new semiJoin may now be my direct descendant and my original
// child a descendant of it.
// In either case my child has now been transformed.
RelExpr *origChild = child(0); // My original child
// Transform each expression of each ValueIdUnion.
// (Do not transform the ValueIdIUnion, but each of its members)
// The keyCol ValueIdUnion does not need to be transformed,
// so the loop index could start at 1.
//
for(CollIndex v = 0; v < transUnionVectorSize(); v++) {
ValueIdList &valIdList = transUnionVector()[v];
for(CollIndex i = 0; i < valIdList.entries(); i++) {
ValueIdUnion *valIdu = ((ValueIdUnion *)valIdList[i].
getValueDesc()->getItemExpr());
CollIndex numEntries = valIdu->entries();
for(CollIndex j = 0; j < numEntries; j++) {
// original expression before transformation.
//
ItemExpr * iePtr = valIdu->getSource(j).getItemExpr();
// The transformed expression.
//
ExprValueId nePtr(iePtr);
// Transform the Item Expression.
iePtr->transformNode(normWARef,
nePtr,
child(0),
getGroupAttr()->getCharacteristicInputs());
// If the original expression was transformed, update the entry
// in the ValueIdUnion
//
if (nePtr != (const ItemExpr *)iePtr) {
valIdu->setSource(j, nePtr->getValueId());
}
}
}
}
if(origChild != child(0)) {
// The transpose expressions were on a subquery that had not been
// processed before. Normalize the new tree that has become
// our child.
//
child(0)->transformNode(normWARef, child(0));
}
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
//
pullUpPreds();
// transform the selection predicates
//
transformSelectPred(normWARef, locationOfPointerToMe);
} // Transpose::transformNode()
// Transpose::rewriteNode() ---------------------------------------------
// rewriteNode() is the virtual function that computes
// the transitive closure for "=" predicates and rewrites value
// expressions.
//
// Parameters:
//
// NormWA & normWARef
// IN : a pointer to the normalizer work area
//
// This implementation is basically the same as RelExpr::rewriteNode()
// but here we need to normalize each member of each ValueIdUnion of
// transUnionVals().
//
void Transpose::rewriteNode(NormWA & normWARef)
{
// Rewrite the expressions of the child node.
//
child(0)->rewriteNode(normWARef);
// normalize each member of each ValueIdUnion of transUnionVals().
// (may be able to get away without normalizing the first (key Values)
// ValueIdUnion. If this is so, the index could start at 1).
//
for(CollIndex v = 0; v < transUnionVectorSize(); v++) {
ValueIdList &valIdList = transUnionVector()[v];
for(CollIndex i = 0; i < valIdList.entries(); i++) {
ValueIdUnion *valIdu = ((ValueIdUnion *)valIdList[i].
getValueDesc()->getItemExpr());
CollIndex numEntries = valIdu->entries();
// Normalize each expression. This may generate new
// ValueIds for the members of the ValueIdUnion.
//
for(CollIndex j = 0; j < numEntries; j++) {
valIdu->normalizeSpecificChild(normWARef, j);
}
}
}
// Rewrite the expressions in the selection preidcates.
//
if (selectionPred().normalizeNode(normWARef))
{
}
// ++MV
if (getUniqueColumns().normalizeNode(normWARef))
{
}
// --MV
// Rewrite the expressions in the Group Attributes.
//
getGroupAttr()->normalizeInputsAndOutputs(normWARef);
} // Transpose::rewriteNode()
// Transpose::recomputeOuterReferences() --------------------------------
// This method is used by the normalizer for recomputing the
// outer references (external dataflow input values) that are
// still referenced by each operator in the subquery tree
// after the predicate pull up is complete.
//
// Side Effects: sets the characteristicInputs of the groupAttr.
//
void Transpose::recomputeOuterReferences()
{
// This is virtual method on RelExpr.
// When this is called it is assumed that the children have already
// been transformed.
// The required inputs of the child are therefore already minimal
// and sufficient.
// It is also assumed that the RelExpr itself has been bound.
// That implies that the group attributes have already been allocated
// and the required inputs is a sufficient (but not necessarilly minimum)
// set of external values needed to evaluate all expressions in this subtree.
//
// Delete all those input values that are no longer referenced on
// this operator because the predicates that reference them have
// been pulled up.
//
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// The set of valueIds need by this node.
//
ValueIdSet allMyExpr(getSelectionPred());
// Add the valueIds of each member of each ValueIdUnion of transUnionVals().
//
for(CollIndex v = 0; v < transUnionVectorSize(); v++) {
ValueIdList &valIdList = transUnionVector()[v];
for(CollIndex i = 0; i < valIdList.entries(); i++) {
ValueIdUnion *valIdu = ((ValueIdUnion *)valIdList[i].
getValueDesc()->getItemExpr());
CollIndex numEntries = valIdu->entries();
for(CollIndex j = 0; j < numEntries; j++) {
// Add the valueIds of each member.
//
allMyExpr += valIdu->getSource(j);
}
}
}
// Remove from outerRefs those valueIds that are not needed
// by all my expressions
//
allMyExpr.weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my children need.
//
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// set my Character Inputs to this new minimal set.
//
getGroupAttr()->setCharacteristicInputs(outerRefs);
} // Transpose::recomputeOuterReferences()
// ***********************************************************************
// Member functions for class Pack
// ***********************************************************************
// -----------------------------------------------------------------------
// Pack::pullUpPreds() is refined to disallow the pullup of predicates
// from the operator's child which may be made up of non-packed columns.
// The pack node packs all the columns it receives from its child and
// predicates evaluated by child couldn't be evaluated here on the packed
// columns any more.
// -----------------------------------------------------------------------
void Pack::pullUpPreds()
{
// ---------------------------------------------------------------------
// Simply don't pull up child's selection predicates. Still need to tell
// child to recompute its outer references due to the warning below.
// ---------------------------------------------------------------------
child(0)->recomputeOuterReferences();
// ---------------------------------------------------------------------
// WARNING: One rule that this procedure must follow is
// that recomputeOuterReferences() must be called on the children even
// if no predicates are pulled up from them. This is to correct
// the outer references that are added to a right child of a
// semi or outer join when processing subqueries in the ON clause.
// ---------------------------------------------------------------------
}
// -----------------------------------------------------------------------
// Pack::recomputeOuterReferences() adds the packing factor to be the
// additional outer references needed by the Pack node.
// -----------------------------------------------------------------------
void Pack::recomputeOuterReferences()
{
// Original set of outer references.
ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs();
// The set of valueIds need by the Pack operator.
ValueIdSet allMyExpr(getSelectionPred());
allMyExpr += packingFactor();
allMyExpr.insertList(packingExpr());
allMyExpr.insertList(requiredOrder());
// Remove from outerRefs those valueIds that are not needed by allMyExpr.
allMyExpr.weedOutUnreferenced(outerRefs);
// Add to outerRefs those that my children need.
outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs();
// Set my characteristic inputs to this new minimal set.
getGroupAttr()->setCharacteristicInputs(outerRefs);
}
// -----------------------------------------------------------------------
// Pack::tranformNode() tranforms the packing expression which might has
// a subquery in it.
// -----------------------------------------------------------------------
void Pack::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT(this == locationOfPointerToMe);
if(nodeIsTransformed()) return;
markAsTransformed();
// Make inputs available to child
child(0)->getGroupAttr()->addCharacteristicInputs
(getGroupAttr()->getCharacteristicInputs());
// ---------------------------------------------------------------------
// Transform the child
// ---------------------------------------------------------------------
child(0)->transformNode(normWARef,child(0));
if(requiredOrder().
transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs())) {
// The requiredOrder list apparently had some subqueries that had
// not been processed before (is this possible?). Normalize the
// new tree that has become our child.
//
child(0)->transformNode(normWARef, child(0));
}
// ---------------------------------------------------------------------
// Transform the computable expressions associated with me.
// If a subquery appears in the compute list, then let the subquery
// transformation cause a semijoin to be performed between Pack and its
// child.
// ---------------------------------------------------------------------
if(packingExpr_.transformNode(normWARef,
child(0),
getGroupAttr()->getCharacteristicInputs()))
{
// -------------------------------------------------------------------
// Transform my new child.
// -------------------------------------------------------------------
child(0)->transformNode(normWARef,child(0));
}
// Pull up the predicates and recompute the required inputs
// of whoever my children are now.
pullUpPreds();
// transform the selection predicates
transformSelectPred(normWARef,locationOfPointerToMe);
}
// -----------------------------------------------------------------------
// Pack::rewriteNode() needs to rewrite the packing expressions as well
// as the selPreds and the inputs/outputs.
// -----------------------------------------------------------------------
void Pack::rewriteNode(NormWA& normWA)
{
// First rewrite the child node.
child(0)->rewriteNode(normWA);
// Rewrite the Pack node's own expressions and its inputs/outputs.
packingFactor().normalizeNode(normWA);
packingExpr().normalizeNode(normWA);
selectionPred().normalizeNode(normWA);
requiredOrder().normalizeNode(normWA);
getGroupAttr()->normalizeInputsAndOutputs(normWA);
}
// ***********************************************************************
// $$$$ CommonSubExprRef
// member functions for class CommonSubExprRef
// ***********************************************************************
void CommonSubExprRef::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( locationOfPointerToMe.getPtr() == this );
if (nodeIsTransformed())
return;
markAsTransformed();
// set lexicalRefNumFromParent_ for expanded refs, now that
// we can be sure the lexical ref has been bound
if (isAnExpansionOf_)
lexicalRefNumFromParent_ = isAnExpansionOf_->lexicalRefNumFromParent_;
// Allocate a new VEG region for the child, to prevent VEGies that
// cross the potentially common part and the rest of the query tree.
//normWARef.allocateAndSetVEGRegion(EXPORT_ONLY, this);
child(0)->getGroupAttr()->addCharacteristicInputs(
getGroupAttr()->getCharacteristicInputs());
child(0)->transformNode(normWARef, child(0));
pullUpPreds();
transformSelectPred(normWARef, locationOfPointerToMe);
//normWARef.restoreOriginalVEGRegion();
}
void CommonSubExprRef::pullUpPreds()
{
// To preserve the commonality of common subexpressions, we
// don't allow to pull predicates out of them.
// so do nothing here, preventing predicate pull-up
// alternatively, we could do the pull-up and record the
// pulled-up predicates here
// RelExpr::pullUpPreds();
// pulledPredicates_ += selectionPred();
}
void CommonSubExprRef::pushdownCoveredExpr(
const ValueIdSet & outputExpr,
const ValueIdSet & newExternalInputs,
ValueIdSet & predicatesOnParent,
const ValueIdSet * setOfValuesReqdByParent,
Lng32 childIndex)
{
// Remember the predicates we pushed down, since other consumers of
// this CSE may not have pushed the equivalent
// predicates. Therefore, if we want to materialize a common
// subexpressions, any predicates that were pushed down and are not
// common to all the consumers must be pulled back out before we can
// share a common query tree.
ValueIdSet predsPushedThisTime(predicatesOnParent);
if (pushedPredicates_.isEmpty())
// this is also the time to record the original set of inputs
// for this node, before predicate pushdown can alter the inputs
commonInputs_ = getGroupAttr()->getCharacteristicInputs();
RelExpr::pushdownCoveredExpr(outputExpr,
newExternalInputs,
predicatesOnParent,
setOfValuesReqdByParent,
childIndex);
predsPushedThisTime -= predicatesOnParent;
pushedPredicates_ += predsPushedThisTime;
}
void CommonSubExprRef::rewriteNode(NormWA & normWARef)
{
RelExpr::rewriteNode(normWARef);
nonVEGColumns_ = columnList_;
columnList_.normalizeNode(normWARef);
commonInputs_.normalizeNode(normWARef);
normWARef.incrementCommonSubExprRefCount();
}
RelExpr * CommonSubExprRef::semanticQueryOptimizeNode(NormWA & normWARef)
{
RelExpr *result = this;
NABoolean ok = TRUE;
CSEInfo *info = CmpCommon::statement()->getCSEInfo(internalName_);
// do the analysis top-down
analyzeAndPrepareForSharing(*info);
RelExpr::semanticQueryOptimizeNode(normWARef);
switch (info->getAnalysisOutcome(id_))
{
case CSEInfo::EXPAND:
// Not able to share the CSE, expand the CSE by eliminating
// this node and putting its child tree in its place. In this
// case, analyzeAndPrepareForSharing() left the tree unchanged.
result = child(0).getPtr();
break;
case CSEInfo::CREATE_TEMP:
determineTempTableType(*info);
if (createTempTable(*info))
{
RelExpr *ins = createInsertIntoTemp(*info, normWARef);
if (ins)
info->setInsertIntoTemp(ins);
else
result = NULL;
}
else
result = NULL;
if (!result)
break;
// fall through to the next case
case CSEInfo::TEMP:
// We are able to share this CSE between multiple consumers.
// Replace this node with a scan on the temp table that
// holds the CSE results.
result = createTempScan(*info, normWARef);
break;
case CSEInfo::ERROR:
// diags should be set
CMPASSERT(CmpCommon::diags()->mainSQLCODE() < 0);
break;
default:
CMPASSERT(0);
}
if (result == NULL)
emitCSEDiagnostics("Error in creating temp table or temp table insert",
TRUE);
return result;
}
NABoolean CommonSubExprRef::prepareMeForCSESharing(
const ValueIdSet &outputsToAdd,
const ValueIdSet &predicatesToRemove,
const ValueIdSet &commonPredicatesToAdd,
const ValueIdSet &inputsToRemove,
ValueIdSet &valuesForVEGRewrite,
ValueIdSet &keyColumns,
CSEInfo *info)
{
// the caller of this method already took care of the adjustments to
// make, just make sure that all predicates could be pushed down to
// the child
if (!getSelectionPred().isEmpty())
{
// this should not happen
emitCSEDiagnostics("Unable to push common predicates into child tree");
return FALSE;
}
return TRUE;
}
CSEInfo::CSEAnalysisOutcome CommonSubExprRef::analyzeAndPrepareForSharing(CSEInfo &info)
{
// do a few simple shortcuts first
// Make sure this consumer is in the main list of consumers. Note
// that the analysis is done top-down and that currently the only
// two places where we make copies of the tree are in
// RelRoot::semanticQueryOptimizeNode() and in this method. The copy
// made in the root is only used when we bypass SQO completely.
// Although we may sometimes look at unused copies during the CSE
// analysis phase, this guarantees (for now) that the analyzing
// consumer always is and stays in the list of consumers. If we ever
// make additional copies of the tree we may need to reconsider this
// logic.
if (info.getConsumer(id_) != this)
{
info.replaceConsumerWithAnAlternative(this);
DCMPASSERT(info.getConsumer(id_) == this);
}
// If another consumer has already done the analysis, return its result.
// Note: Right now, all the consumers do the same, in the future, we could
// expand some and share others.
if (info.getIdOfAnalyzingConsumer() >= 0)
return info.getAnalysisOutcome(id_);
// mark me as the analyzing consumer
info.setIdOfAnalyzingConsumer(id_);
if (CmpCommon::getDefault(CSE_USE_TEMP) == DF_OFF)
{
emitCSEDiagnostics("Forced with CQD CSE_USE_TEMP CQD 'off'");
info.setAnalysisOutcome(CSEInfo::EXPAND);
return CSEInfo::EXPAND;
}
CSEInfo::CSEAnalysisOutcome result = CSEInfo::UNKNOWN_ANALYSIS;
NABoolean canShare = TRUE;
NABitVector neededColumnsBitmap;
ValueIdList tempTableColumns;
const ValueIdSet &charOutputs(getGroupAttr()->getCharacteristicOutputs());
CollIndex numConsumers = info.getNumConsumers();
RelExpr *copyOfChildTree = NULL;
// A laundry list of changes to undo the effects of normalization,
// specifically of pushing predicates down and of minimizing the
// outputs. Also, a list of new common selection predicates to add.
ValueIdSet outputsToAdd;
ValueIdSet predicatesToRemove(pushedPredicates_);
ValueIdSet newPredicatesToAdd;
ValueIdSet commonPredicates(pushedPredicates_);
ValueIdSet inputsToRemove(child(0).getGroupAttr()->getCharacteristicInputs());
ValueIdSet *nonCommonPredicatesArray =
new(CmpCommon::statementHeap()) ValueIdSet[numConsumers];
ValueIdMap *myColsToConsumerMaps =
new(CmpCommon::statementHeap()) ValueIdMap[numConsumers];
ItemExpr *nonCommonPredicatesORed = NULL;
int numORedPreds = 0;
NABoolean singleLexicalRefWithTempedAncestors =
(info.getNumLexicalRefs() == 1);
Int32 numPreliminaryRefs = 0;
ValueIdSet childTreeKeyColumns;
// ------------------------------------------------------------------
// CSE Analysis phase
// ------------------------------------------------------------------
// loop over the consumers of the CSE to negotiate a common set
// of columns to retrieve and a common set of predicates that can
// remain pushed down
for (CollIndex c=0; c<numConsumers && canShare; c++)
{
CommonSubExprRef *consumer = info.getConsumer(c);
const ValueIdList &cCols(consumer->columnList_);
ValueIdSet availableValues(cCols);
ValueIdSet requiredValues(
consumer->getGroupAttr()->getCharacteristicOutputs());
const ValueIdSet &cPreds(consumer->pushedPredicates_);
ValueIdSet mappedPreds;
ValueId dummy;
CSEInfo *infoToCheck = &info;
CommonSubExprRef *childToCheck = consumer;
NABoolean ancestorIsTemped = FALSE;
// look for a chain of only lexical ancestors of which one is
// materialized in a temp table
while (!ancestorIsTemped &&
infoToCheck->getNumLexicalRefs() == 1 &&
childToCheck &&
childToCheck->parentRefId_ >= 0)
{
// look at the ancestor and what it is planning to do
infoToCheck = CmpCommon::statement()->getCSEInfoById(
childToCheck->parentCSEId_);
CMPASSERT(infoToCheck);
CommonSubExprRef *parent =
infoToCheck->getConsumer(childToCheck->parentRefId_);
CSEInfo::CSEAnalysisOutcome parentOutcome =
infoToCheck->getAnalysisOutcome(parent->getId());
if (parentOutcome == CSEInfo::CREATE_TEMP ||
parentOutcome == CSEInfo::TEMP)
ancestorIsTemped = TRUE;
childToCheck = parent;
}
if (!ancestorIsTemped)
singleLexicalRefWithTempedAncestors = FALSE;
requiredValues += cPreds;
availableValues +=
consumer->getGroupAttr()->getCharacteristicInputs();
// Do a sanity check whether we can produce the required
// values (outputs and predicates) from the available values
// (tables of the original subexpression, to be a temp table).
// If not, one reason could be that we copied an expression
// and now have different ValueIds. This could be improved.
if (requiredValues.removeUnCoveredExprs(availableValues))
{
emitCSEDiagnostics(
"Characteristic outputs not covered by common subexpression");
canShare = FALSE;
}
// Check the required values of this consumer and add all of
// them (by position number of the original list) to the bit
// vector of required columns. Note that we might be able to
// optimize this somewhat for expressions.
for (CollIndex i=0; i<cCols.entries(); i++)
if (requiredValues.referencesTheGivenValue(cCols[i],
dummy,
TRUE,
TRUE))
neededColumnsBitmap += i;
if (!cPreds.isEmpty())
if (consumer->id_ == id_)
{
// Assert for now that we are still seeing the same node,
// not a copy. If this fails, think about whether making
// a copy might cause issues here, e.g. because some of
// the information has diverged.
DCMPASSERT(consumer == this);
// consumer is the same as "this"
mappedPreds = cPreds;
}
else
{
// another consumer, likely to use different ValueIds
// a ValueIdMap that maps my columns (top) to those of the
// other consumer (bottom)
ValueIdSet vegRefsWithDifferingConsts;
ValueIdSet vegRefsWithDifferingInputs;
myColsToConsumerMaps[c] = ValueIdMap(columnList_, cCols);
// make sure we can also map VEGPreds for any VEGRefs in the map
myColsToConsumerMaps[c].augmentForVEG(
TRUE, // add VEGPreds for existing VEGRefs
FALSE, // no need to add more VEGRefs
TRUE, // only do this if constants match
// only do this if the VEGies refer to
// the same outputs
&(getGroupAttr()->getCharacteristicInputs()),
&(consumer->getGroupAttr()->getCharacteristicInputs()),
&vegRefsWithDifferingConsts,
&vegRefsWithDifferingInputs);
// for now, don't work on trees that have VEGies with differing
// constants or inputs
if (vegRefsWithDifferingConsts.entries() > 0)
{
info.addVEGRefsWithDifferingConstants(vegRefsWithDifferingConsts);
emitCSEDiagnostics(
"Encountered VEGs with different constants in different consumers");
canShare = FALSE;
}
if (vegRefsWithDifferingInputs.entries() > 0)
{
info.addVEGRefsWithDifferingInputs(vegRefsWithDifferingInputs);
emitCSEDiagnostics("Encountered VEGs with different characteristic inputs");
canShare = FALSE;
}
// Check the inputs, all of the consumers must have the same inputs
// (parameters). We could see differences if query caching decides
// to parameterize the copies of the CTEs differently.
if (consumer->commonInputs_ != commonInputs_)
{
emitCSEDiagnostics(
"Differing inputs in CTE references, try CQD QUERY_CACHE '0'");
canShare = FALSE;
}
// rewrite the predicates on the consumer in terms of my
// own ValueIds
myColsToConsumerMaps[c].rewriteValueIdSetUp(mappedPreds, cPreds);
commonPredicates.findCommonSubexpressions(mappedPreds, FALSE);
}
// Save the mapped preds for later.
// Note: These are not final yet, until we have found
// common predicates among all the consumers.
nonCommonPredicatesArray[c] = mappedPreds;
}
if (singleLexicalRefWithTempedAncestors)
{
// if all the parent refs are materialized and each one is a
// copy of a single lexical ref, then that means that we will
// evaluate this CSE only once, therefore no need to materialize
// it
emitCSEDiagnostics(
"expression is only evaluated once because parent is materialized");
canShare = FALSE;
}
// translate the bit vector of required columns into a set of values
// that are required (by other consumers) but are not produced by my
// child tree
makeValueIdListFromBitVector(tempTableColumns,
columnList_,
neededColumnsBitmap);
outputsToAdd.insertList(tempTableColumns);
info.setNeededColumns(neededColumnsBitmap);
predicatesToRemove -= commonPredicates;
info.setCommonPredicates(commonPredicates);
if (canShare && info.getNeededColumns().entries() == 0)
{
// Temp table has no columns, looks like all we care about is
// the number of rows returned. This is not yet supported. We
// could make a table with a dummy column.
emitCSEDiagnostics("Temp table with no columns is not yet supported");
canShare = FALSE;
}
// Make an ORed predicate of all those non-common predicates of the
// consumers, to be applied on the common subexpression when creating
// the temp table. Also determine non-common predicates to be applied
// when scanning the temp table.
for (CollIndex n=0; n<numConsumers && canShare; n++)
{
// Now that we have the definitive set of common predicates,
// we can get the "uncommon" predicates, i.e. those that
// have to be evaluated on the individual scans of the temp
// tables. What we can do, however, is to OR these "uncommon"
// predicates and apply that OR predicate when building the
// temp table.
// repeat step from above, but this time remove the common
// preds from the array of non-common ones
commonPredicates.findCommonSubexpressions(nonCommonPredicatesArray[n],
TRUE);
if (nonCommonPredicatesArray[n].entries() > 0)
{
if (numORedPreds == n)
{
// build the ORed predicate
ItemExpr *uncommonPreds =
nonCommonPredicatesArray[n].rebuildExprTree();
if (nonCommonPredicatesORed)
nonCommonPredicatesORed =
new(CmpCommon::statementHeap()) BiLogic(
ITM_OR,
nonCommonPredicatesORed,
uncommonPreds);
else
nonCommonPredicatesORed = uncommonPreds;
numORedPreds++;
}
// rewrite the non-common predicates in terms of the consumer
// (the ValueIdMap should in many cases already have the
// correct translation)
myColsToConsumerMaps[n].rewriteValueIdSetDown(
nonCommonPredicatesArray[n],
info.getConsumer(n)->nonSharedPredicates_);
}
}
// adding the ORed non-common predicates makes sense only if all
// consumers have some such predicate. If at least one consumer
// doesn't, that's equivalent to a TRUE predicate, and TRUE OR x is
// always TRUE.
if (numORedPreds == numConsumers)
{
nonCommonPredicatesORed->synthTypeAndValueId();
newPredicatesToAdd += nonCommonPredicatesORed->getValueId();
info.addCommonPredicates(newPredicatesToAdd);
}
// ------------------------------------------------------------------
// Preparation phase
// ------------------------------------------------------------------
if (canShare)
{
// make a copy of the child tree, so we can revert back to the
// original tree if things don't work out
copyOfChildTree = child(0)->copyRelExprTree(CmpCommon::statementHeap());
outputsToAdd -= child(0).getGroupAttr()->getCharacteristicOutputs();
inputsToRemove -= commonInputs_;
canShare = copyOfChildTree->prepareTreeForCSESharing(
outputsToAdd,
predicatesToRemove,
newPredicatesToAdd,
inputsToRemove,
nonVEGColumns_,
childTreeKeyColumns,
&info);
if (!canShare)
emitCSEDiagnostics("Failed to prepare child tree for materialization");
else if (!copyOfChildTree->getGroupAttr()->getCharacteristicOutputs().contains(
outputsToAdd))
{
// we failed to produce the requested additional outputs
emitCSEDiagnostics("Failed to produce all the required output columns");
canShare = FALSE;
}
else
{
// remember est. log. props of the child, those will be transplanted
// into the temp scan later
cseEstLogProps_ =
copyOfChildTree->getGroupAttr()->outputLogProp(
(*GLOBAL_EMPTY_INPUT_LOGPROP));
// Get a preliminary bearing on how many times we are going
// to evaluate this CSE if it isn't shared. Note that this
// looks at the parent CSE's analysis outcome, and not all
// of these parents may be analyzed yet, so this may be an
// overestimate.
numPreliminaryRefs = info.getTotalNumRefs();
for (CollIndex k=0; k<tempTableColumns.entries(); k++)
if (childTreeKeyColumns.contains(tempTableColumns[k]))
info.addCSEKeyColumn(k);
}
}
if (canShare &&
CmpCommon::getDefault(CSE_USE_TEMP) != DF_ON)
{
// When CSE_USE_TEMP is set to SYSTEM, make a heuristic decision
// calculate some metrics for the temp table, based on row length,
// cardinality (or max. cardinality) and number of times it is used
Lng32 tempTableRowLength = tempTableColumns.getRowLength();
CostScalar cseTempTableSize = cseEstLogProps_->getResultCardinality() *
tempTableRowLength / numPreliminaryRefs;
CostScalar cseTempTableMaxSize = cseEstLogProps_->getMaxCardEst() *
tempTableRowLength / numPreliminaryRefs;
double maxTableSize =
ActiveSchemaDB()->getDefaults().getAsDouble(CSE_TEMP_TABLE_MAX_SIZE);
double maxTableSizeBasedOnMaxCard =
ActiveSchemaDB()->getDefaults().getAsDouble(CSE_TEMP_TABLE_MAX_MAX_SIZE);
// cumulative number of key columns referenced in consumers
Int32 totalKeyColPreds = 0;
// key cols that are referenced by a predicate in all consumers
ValueIdSet commonKeyCols(childTreeKeyColumns);
// check the total size of the temp table, divided by the number
// of times it is used
if (maxTableSize > 0 && cseTempTableSize > maxTableSize)
{
char buf[200];
snprintf(buf, sizeof(buf),
"Temp table size %e exceeds limit %e",
cseTempTableSize.getValue(),
maxTableSize);
emitCSEDiagnostics(buf);
canShare = FALSE;
}
else if (maxTableSizeBasedOnMaxCard > 0 &&
cseTempTableMaxSize > maxTableSizeBasedOnMaxCard)
{
char buf[200];
snprintf(buf, sizeof(buf),
"Temp table size %e (based on max card) exceeds limit %e",
cseTempTableMaxSize.getValue(),
maxTableSizeBasedOnMaxCard);
emitCSEDiagnostics(buf);
canShare = FALSE;
}
// determine which "key" columns are referenced by non-common
// predicates
for (CollIndex ncp=0; ncp<numConsumers; ncp++)
{
const ValueIdSet &nonCommonPreds(nonCommonPredicatesArray[ncp]);
ValueIdSet tempRefCols;
tempRefCols.accumulateReferencedValues(childTreeKeyColumns,
nonCommonPreds);
totalKeyColPreds += tempRefCols.entries();
nonCommonPreds.weedOutUnreferenced(commonKeyCols);
}
// decide against materialization if the average number of "key"
// columns referenced in each consumer is greater than
// CSE_PCT_KEY_COL_PRED_CONTROL percent
if (totalKeyColPreds >
(numConsumers * childTreeKeyColumns.entries() *
ActiveSchemaDB()->getDefaults().getAsDouble(CSE_PCT_KEY_COL_PRED_CONTROL) / 100.0))
{
char buf[200];
snprintf(buf, sizeof(buf),
"Number of potential key predicates in consumers (%d) exceeds limit %f",
totalKeyColPreds,
(numConsumers * childTreeKeyColumns.entries() *
ActiveSchemaDB()->getDefaults().getAsDouble(CSE_PCT_KEY_COL_PRED_CONTROL) / 100.0));
emitCSEDiagnostics(buf);
canShare = FALSE;
}
// decide against materialization if the number of key columns
// referenced by every consumer is > CSE_COMMON_KEY_PRED_CONTROL
if (commonKeyCols.entries() >
ActiveSchemaDB()->getDefaults().getAsLong(CSE_COMMON_KEY_PRED_CONTROL))
{
char buf[200];
snprintf(buf, sizeof(buf),
"All consumers have a predicate on %d common key columns, limit is %d",
commonKeyCols.entries(),
ActiveSchemaDB()->getDefaults().getAsLong(CSE_COMMON_KEY_PRED_CONTROL));
emitCSEDiagnostics(buf);
canShare = FALSE;
}
}
if (canShare)
{
result = CSEInfo::CREATE_TEMP;
child(0) = copyOfChildTree;
}
else if (result == CSEInfo::UNKNOWN_ANALYSIS)
result = CSEInfo::EXPAND;
info.setAnalysisOutcome(result);
return result;
}
void CommonSubExprRef::determineTempTableType(CSEInfo &info)
{
NABoolean createHiveTable =
(CmpCommon::getDefault(CSE_HIVE_TEMP_TABLE) == DF_ON);
if (createHiveTable)
info.setTempTableType(CSEInfo::HIVE_TEMP_TABLE);
else
info.setTempTableType(CSEInfo::VOLATILE_TEMP_TABLE);
}
NABoolean CommonSubExprRef::createTempTable(CSEInfo &info)
{
int result = TRUE;
const int maxCSENameLen = 12;
NAString tempTableName(COM_CSE_TABLE_PREFIX);
NAString tempTableSchema;
NAString tempTableCatalog;
CSEInfo::CSETempTableType tempTableType = info.getTempTableType();
char buf[32];
NAString tempTableDDL;
ValueIdList cols;
NAString cseNamePrefix(internalName_.data(),
MINOF(internalName_.length(),16));
// Note: Errors at this stage of the process may be recoverable, so
// we emit only warning diagnostics and just return FALSE if the
// temp table cannot be created
// Step 1: Create temp table name
// ------------------------------
// we create a name of this form:
// where
// ppp... is a prefix of the CTE name or an internal name
// (just to make it easier to identify, not really needed,
// we only use letters, digits, underscores)
// iii... is the SQL session id
// (Hive tables only, to keep different sessions apart)
// sss is the statement number in this session
// ccc is the CSE number in this statement
// Overall name length is 256, and both HDFS directory and file name
// can be quite long, so don't allow long user names as well. Note
// that the user name is just here to improve readability by humans,
// it's not needed for uniqueness.
if (cseNamePrefix.length() > maxCSENameLen)
cseNamePrefix.remove(maxCSENameLen);
cseNamePrefix.toUpper();
for (int p=0; p<cseNamePrefix.length(); p++)
{
char c = cseNamePrefix[p];
if (!(c >= '0' && c <= '9' ||
c >= 'A' && c <= 'Z' ||
c == '_'))
cseNamePrefix.replace(p,1,"_");
}
tempTableName += cseNamePrefix;
if (tempTableType == CSEInfo::HIVE_TEMP_TABLE)
{
tempTableName += "_";
tempTableName +=
CmpCommon::context()->sqlSession()->getSessionId();
}
snprintf(buf, sizeof(buf), "_S%u_%d",
CmpCommon::context()->getStatementNum(),
info.getCSEId());
tempTableName += buf;
if (tempTableType == CSEInfo::HIVE_TEMP_TABLE)
{
tempTableSchema = HIVE_SYSTEM_SCHEMA;
tempTableCatalog = HIVE_SYSTEM_CATALOG;
}
info.setTempTableName(QualifiedName(tempTableName,
tempTableSchema,
tempTableCatalog));
// Step 2: Create the DDL for the temp table
// -----------------------------------------
tempTableDDL += "CREATE ";
if (tempTableType == CSEInfo::VOLATILE_TEMP_TABLE)
tempTableDDL += "VOLATILE ";
tempTableDDL += "TABLE ";
if (tempTableType == CSEInfo::HIVE_TEMP_TABLE &&
tempTableSchema == HIVE_SYSTEM_SCHEMA ||
tempTableType == CSEInfo::VOLATILE_TEMP_TABLE)
{
// Hive table in default schema or volatile table,
// juse a one-part name
tempTableDDL += tempTableName;
}
else if (tempTableType == CSEInfo::HIVE_TEMP_TABLE)
{
// Hive table in a different schema, use a 2 part name
// (not yet supported)
tempTableDDL += tempTableSchema;
tempTableDDL += '.';
tempTableDDL += tempTableName;
}
else
{
// use a regular 3-part name
// (not yet supported)
tempTableDDL +=
info.getTempTableName().
getQualifiedNameAsAnsiString();
}
tempTableDDL += "(\n";
makeValueIdListFromBitVector(cols, columnList_, info.getNeededColumns());
for (CollIndex c=0; c<cols.entries(); c++)
{
char colName[10];
NAString colType;
snprintf(colName, sizeof(colName)," C%05d ", c);
tempTableDDL += colName;
if (tempTableType == CSEInfo::HIVE_TEMP_TABLE)
cols[c].getType().getMyTypeAsHiveText(&colType);
else
cols[c].getType().getMyTypeAsText(&colType);
if (colType == "unknown")
{
char buf[100];
colType = "";
cols[c].getType().getMyTypeAsText(&colType);
snprintf(buf, sizeof(buf),
"Unsupported data type for Hive temp table: %s",
colType.data());
emitCSEDiagnostics(buf, FALSE);
result = FALSE;
}
tempTableDDL += colType;
if (c+1 < cols.entries())
tempTableDDL += ",\n";
else
tempTableDDL += ")";
}
if (result)
info.setTempTableDDL(tempTableDDL);
// Step 3: Create the temp table
// -----------------------------
if (result)
if (tempTableType == CSEInfo::HIVE_TEMP_TABLE)
{
int m = CmpCommon::diags()->mark();
if (HiveClient_JNI::executeHiveSQL(tempTableDDL) != HVC_OK)
{
if (CmpCommon::statement()->recompiling() ||
CmpCommon::statement()->getNumOfCompilationRetries() > 0)
// ignore temp table creation errors if we are
// recompiling, the temp table may have been
// created in a previous compilation attempt
// (if not, we will run into other errors later)
CmpCommon::diags()->rewind(m);
else
{
result = FALSE;
// we will fall back to a previous tree and try to
// recover, make sure there are no errors from our
// failed attempt in the diags area
CmpCommon::diags()->negateAllErrors();
emitCSEDiagnostics(
"Error in creating Hive temp table");
}
}
}
else
{
// Todo: CSE: create volatile table
emitCSEDiagnostics("Volatile temp tables not yet supported");
result = FALSE;
}
// Step 4: Get the NATable for the temp table
// ------------------------------------------
if (result)
{
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
CorrName cn(info.getTempTableName());
NATable *tempNATable =
ActiveSchemaDB()->getNATableDB()->get(cn,
&bindWA,
NULL);
if (!tempNATable)
emitCSEDiagnostics("Unable to read metadata for temporary table");
else
info.setTempNATable(tempNATable);
}
return result;
}
RelExpr * CommonSubExprRef::createInsertIntoTemp(CSEInfo &info, NormWA & normWARef)
{
RelExpr *result = NULL;
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
CorrName cn(info.getTempTableName());
if (!info.getTempNATable())
// an earlier failure
return NULL;
TableDesc *tableDesc =
bindWA.createTableDesc(info.getTempNATable(),
cn,
FALSE);
ValueIdList srcValueList;
if (info.getTempTableType() == CSEInfo::HIVE_TEMP_TABLE)
{
// Create this tree:
//
// BlockedUnion
// / \
// Truncate FastExtract temp
// temp |
// cse
//
// In this tree "cse" is the child of this node and "temp" is
// the name of the Hive table. The tree is equivalent to what
// would be generated by an SQL statement
// "insert overwite table <temp> <cse>".
result = FastExtract::makeFastExtractTree(
tableDesc,
child(0).getPtr(),
TRUE, // overwrite the table
FALSE, // called outside the binder
TRUE, // this is a table for a common subexpression
&bindWA);
CMPASSERT(result->getOperatorType() == REL_UNION &&
result->child(1)->getOperatorType() == REL_FAST_EXTRACT);
RelExpr *fe = result->child(1);
makeValueIdListFromBitVector(srcValueList, columnList_, info.getNeededColumns());
CMPASSERT(fe->getOperatorType() == REL_FAST_EXTRACT);
static_cast<FastExtract *>(fe)->setSelectList(srcValueList);
fe->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
fe->getGroupAttr()->addCharacteristicInputs(
fe->child(0).getGroupAttr()->getCharacteristicInputs());
result->child(0)->setGroupAttr(
new (CmpCommon::statementHeap()) GroupAttributes());
result->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
result->getGroupAttr()->addCharacteristicInputs(
fe->getGroupAttr()->getCharacteristicInputs());
}
else
{
emitCSEDiagnostics(
"Unsupported temp table type in createInsertIntoTemp()",
TRUE);
}
info.setInsertIntoTemp(result);
return result;
}
RelExpr * CommonSubExprRef::createTempScan(CSEInfo &info, NormWA & normWARef) //
{
// check for earlier errors
if (!info.getInsertIntoTemp())
return NULL;
MapValueIds *result = NULL;
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
CorrName cn(info.getTempTableName(),
CmpCommon::statementHeap(),
internalName_);
TableDesc *tableDesc =
bindWA.createTableDesc(info.getTempNATable(),
cn,
FALSE,
getHint());
Scan *scan =
new(CmpCommon::statementHeap()) Scan(cn, tableDesc);
// Run the new scan through bind and normalization phases, like the
// rest of the nodes have
ExprGroupId x(scan);
scan->bindSelf(&bindWA);
normWARef.allocateAndSetVEGRegion(IMPORT_ONLY, scan);
scan->transformNode(normWARef, x);
CMPASSERT(x.getPtr() == scan);
scan->rewriteNode(normWARef);
scan->normalizeNode(normWARef);
scan->synthLogProp(&normWARef);
normWARef.restoreOriginalVEGRegion();
scan->setCommonSubExpr(this);
// At this point we have a scan node on the temp table, with a new
// TableDesc that has new ValueIds. Make a map from the new ids to
// my own.
ValueIdList myOutputs;
ValueIdList tempTableOutputList;
ValueIdList tempTableVEGOutputList;
ValueIdSet tempTableOutputs;
ValueIdSet tempTablePreds;
makeValueIdListFromBitVector(myOutputs, columnList_, info.getNeededColumns());
tableDesc->getUserColumnList(tempTableOutputList);
tableDesc->getEquivVEGCols(tempTableOutputList, tempTableVEGOutputList);
CMPASSERT(myOutputs.entries() == tempTableVEGOutputList.entries());
ValueIdMap outToTempMap(myOutputs, tempTableVEGOutputList);
result = new(CmpCommon::statementHeap()) MapValueIds(scan,
outToTempMap,
CmpCommon::statementHeap());
result->setCSERef(this);
result->addValuesForVEGRewrite(nonVEGColumns_);
outToTempMap.rewriteValueIdSetDown(getGroupAttr()->getCharacteristicOutputs(),
tempTableOutputs);
// Todo: CSE: the rewrite below doesn't work with VEGPreds, and the
// augment method also isn't sufficient
outToTempMap.rewriteValueIdSetDown(nonSharedPredicates_, tempTablePreds);
scan->getGroupAttr()->setCharacteristicInputs(
getGroupAttr()->getCharacteristicInputs());
scan->getGroupAttr()->setCharacteristicOutputs(tempTableOutputs);
scan->setSelectionPredicates(tempTablePreds);
result->setGroupAttr(getGroupAttr());
return result;
}
void CommonSubExprRef::emitCSEDiagnostics(const char *message, NABoolean forceError)
{
// Normally this does nothing.
// With CQD CSE_DEBUG_WARNINGS ON, it emits diagnostics about the reason(s) why
// we don't share some common subexpressions.
// With forceError set to TRUE, it generates an internal error that causes the
// query to fail. This should be avoided as best as possible, since expanding
// the CSEs should have given us a successful plan.
if (CmpCommon::getDefault(CSE_DEBUG_WARNINGS) == DF_ON || forceError)
{
*CmpCommon::diags() << DgSqlCode(5001)
<< DgString0(internalName_.data())
<< DgString1(message);
if (forceError)
// throw an exception that forces the normalizer to skip the
// SQO phase and to revert to the original tree
AssertException(message, __FILE__, __LINE__).throwException();
}
}
// -----------------------------------------------------------------------
// IsolatedNonTableUDR::transformNode()
// -----------------------------------------------------------------------
void IsolatedNonTableUDR::transformNode(NormWA & normWARef,
ExprGroupId & locationOfPointerToMe)
{
CMPASSERT( this == locationOfPointerToMe );
if (nodeIsTransformed())
return;
// If we are a CallSP, the binder put the subquery or UDF in a Tuple
// node as child(0). Need to transform the child before we do the rest
// of the Node to allow the Tuple::transformNode() to remove the ValueId
// of the Subquery or UDF from its tupleExpr. Otherwise we end up with
// an illegal transformation.
//
// This would not be needed if CallSP worked like the other nodes.
// Consider fixing so the binder doesn't create the tuple, but allow
// the normal transformation like we do here do its magic.
//
// The other thing that is different for CallSP is that if it has a
// subquery or UDF in its inputs, it is not a leafNode until after
// we do the final transformation in TransRule.
//
// There we transform something like this:
//
// CallSP Join
// | / \
// Join ===> Join CallSp
// / \ / \
// Values T1 Values T1
//
if (child(0) != NULL)
{
child(0)->transformNode (normWARef, child(0));
// The RelRoutine:: transformNode () will transform the new child.
}
// Let the RelRoutine::transformNode() do the work.
RelRoutine::transformNode (normWARef, locationOfPointerToMe);
// The needeedValueIds is left over from the old CallSp class hierarchy
// It is believed that the inputParamsVids() should suffice.
// Will optimize this later.
getNeededValueIds() = getProcInputParamsVids();
// ---------------------------------------------------------------------
// Prime the Group Attributes
// ---------------------------------------------------------------------
primeGroupAttributes();
markAsTransformed();
} // IsolatedNonTableUDR::transformNode()
// -----------------------------------------------------------------------
// IsolatedNonTableUDR::rewriteNode()
// -----------------------------------------------------------------------
void IsolatedNonTableUDR::rewriteNode(NormWA &normWARef)
{
// ---------------------------------------------------------------------
// Make sure to rewrite all of our parameter inputs and predicates.
// ---------------------------------------------------------------------
getNeededValueIds().normalizeNode(normWARef);
RelRoutine::rewriteNode(normWARef);
}
//**********************************
// Constructor for class CqsWA
//***********************************
CqsWA::CqsWA():
tableCANodeList_(new (CmpCommon::statementHeap())
TableCANodeIdPairLookupList(
CmpCommon::statementHeap())
),
cqsCANodeIdMap_(new (CmpCommon::statementHeap())
CQSRelExprCANodeIdMap(30,
CmpCommon::statementHeap())
),
reArrangementSuccessful_(FALSE),
numberOfScanNodesinNQT_(0),
numberOfScanNodesinCQS_(0)
{}
//************************************************************************
// This method collects CANodeIds from each scan node of the Normalized
// tree
//************************************************************************
void CqsWA::gatherCANodeIDTableNamepairsForNormalizedTree( RelExpr *nqtExpr)
{
// leaf
if (nqtExpr->getArity() == 0)
{
if (nqtExpr->getOperatorType() == REL_SCAN)
{
Scan *scan = (Scan *) nqtExpr;
TableCANodeIdPair *tableIdPair = new (CmpCommon::statementHeap())
TableCANodeIdPair();
tableIdPair->Id_ = scan->getGroupAttr()->getGroupAnalysis()->
getNodeAnalysis()->getId();
tableIdPair->tabId_ = scan->getTableDesc();
getTableCANodeList()->insert(tableIdPair);
}
}
else
{
Int32 i =0;
for (; i<nqtExpr->getArity();i++)
{
gatherCANodeIDTableNamepairsForNormalizedTree(nqtExpr->child(i));
}
}
} // gatherCANodeIDTableNamepairsForNormalizedTree()
//**************************************************************************
// This method delegates responsibility of gathering CANodeIdSets (TableSets)
// for the CQS tree to class CQSRelExprNodeMap
//***************************************************************************
void CqsWA::gatherNodeIdSetsForCQSTree(RelExpr *cqsExpr)
{
CANodeIdSet set_= getcqsCANodeIdMap()->gatherNodeIdSetsForCQSTree(cqsExpr, this);
}
//*************************************************************************
// This method collects CANodeId values for all tables in the CQS tree.
//*************************************************************************
CANodeIdSet CQSRelExprCANodeIdMap::gatherNodeIdSetsForCQSTree(RelExpr* cqsExpr,
CqsWA *cwa)
{
Int32 arity = cqsExpr->getArity();
if ((arity == 0) && (cqsExpr->getOperatorType() == REL_FORCE_ANY_SCAN))
{
CQSRelExprCANodeIdPair * relExprNodeId = new (CmpCommon::statementHeap())
CQSRelExprCANodeIdPair();
cwa->incrementNumberOfScanNodesinCQS();
ScanForceWildCard *forcedScan = (ScanForceWildCard *) cqsExpr;
CANodeId Id_ = relExprNodeId->populateReturnCANodeId(forcedScan, cwa);
insertThisElement(forcedScan, relExprNodeId);
CANodeIdSet caNodeset(Id_);
return caNodeset;
}
else if (arity > 0)
{
CQSRelExprCANodeIdPair * relExprNodeId =
new (CmpCommon::statementHeap()) CQSRelExprCANodeIdPair();
if (cwa->isIndexJoin(cqsExpr))
{
relExprNodeId->leftChildSet_ = CANodeIdSet();
relExprNodeId->rightChildSet_=gatherNodeIdSetsForCQSTree(
cqsExpr->child(1), cwa);
}
else
{
relExprNodeId->leftChildSet_=gatherNodeIdSetsForCQSTree(
cqsExpr->child(0), cwa);
if (arity == 1)
relExprNodeId->rightChildSet_=CANodeIdSet();
else
// arity is 2
relExprNodeId->rightChildSet_=gatherNodeIdSetsForCQSTree(
cqsExpr->child(1), cwa);
}
relExprNodeId->forcedNode_ = cqsExpr;
insertThisElement(cqsExpr,relExprNodeId);
return relExprNodeId->leftChildSet_+
relExprNodeId->rightChildSet_;
}
else
{
// leaves other than scan such as Tuple...
// how do we treat derived tables? (values(1) as t(a))
return CANodeIdSet();
}
} // gatherNodeIdSetsForCQSTree()
//************************************************************************
// Given a Table Name or Index Name, this finds the corresponding CANodeId
// For MP tables: the table name needs to be like \node.$vol.subvol.tablename
// Otherwise, we assume that it is an MX table....
//************************************************************************
CANodeId CqsWA::findCANodeId(const NAString &tableName)
{
TableCANodeIdPairLookupList *tcpairList = getTableCANodeList();
TableCANodeIdPair *tcpair;
// if tableName is of form cat.sch.t ok
// otherwise get it to that form by appending current catalog and
// schema name as required.
// how about MP tables???????????? TBD.....
// for MX tables only or ANSI notation...
NAString tableNameAppend(CmpCommon::statementHeap());
for (CollIndex i=0; i < tcpairList->entries(); i++
)
{
tcpair = tcpairList->at(i);
if (tcpair->tabId_->getCorrNameObj().getCorrNameAsString() != "")
{
// if correlation name is set, do not append the default catalog
// and schema name
tableNameAppend = tableName;
}
else if (isMPTable(tableName))
{
tableNameAppend = tableName;
}
else
{
tableNameAppend = makeItThreePartAnsiString(tableName);
}
if ((tcpair->tabId_->getCorrNameObj().getQualifiedNameAsString()
== tableNameAppend) ||
(tcpair->tabId_->getCorrNameObj().getCorrNameAsString()
== tableNameAppend))
{
if (tcpair->visited_)
AssertException("", __FILE__, __LINE__).throwException();
tcpair->visited_ = TRUE;
return tcpair->Id_;
}
else
{
// check indexes
IndexDesc *indexDesc;
const LIST(IndexDesc *) &indexList = tcpair->tabId_->getIndexes();
for (CollIndex j=0; j < indexList.entries(); j++)
{
indexDesc = indexList.at(j);
if (tableNameAppend == indexDesc->getNAFileSet()->getExtFileSetName())
{
if (tcpair->visited_)
AssertException("", __FILE__, __LINE__).throwException();
tcpair->visited_ = TRUE;
return tcpair->Id_;
}
}
}
} // for
//if you are here, invoke error handling
AssertException("", __FILE__, __LINE__).throwException();
return CANodeId(); // keep VisualC++ happy
} // CqsWA::findCANodeId()
//*************************************************************
// This method collects CANodeId values for each table
// It then traverses the CQS tree and collects Tablesets for each
// node. Both left and right child table sets are kept at each node
// Tablesets are sets of CANodeId values...
//**************************************************************
void CqsWA::initialize(RelExpr *nqtExpr, RelExpr *cqsExpr)
{
gatherCANodeIDTableNamepairsForNormalizedTree(nqtExpr);
numberOfScanNodesinNQT_ = getTableCANodeList()->entries();
gatherNodeIdSetsForCQSTree(cqsExpr);
if (numberOfScanNodesinNQT_ > numberOfScanNodesinCQS_)
{
AssertException("", __FILE__, __LINE__).throwException();
}
}
//*****************************************************************
// For a given scan node, this collects the CANodeId
//*****************************************************************
CANodeId CQSRelExprCANodeIdPair::populateReturnCANodeId(RelExpr *scan,
CqsWA *cwa)
{
ScanForceWildCard *forcedScan = (ScanForceWildCard *) scan;
NAString tableName(forcedScan->getExposedName(),
CmpCommon::statementHeap());
NAString indexName( forcedScan->getIndexName(),CmpCommon::statementHeap());
CANodeId Id_;
if (tableName != "")
{
Id_ = cwa->findCANodeId(tableName);
}
else if (indexName != "")
{
Id_ = cwa->findCANodeId(indexName);
}
//else
// error & give-up
forcedNode_ = forcedScan;
leftChildSet_ = CANodeIdSet();
rightChildSet_ = CANodeIdSet();
return Id_;
} // CQSRelExprCANodeIdPair::populateReturnCANodeId()
//*********************************************************************
// Constructor for the map: maps CQS Relational expression pointer with
// CANodeId Sets of left subtree and right subtree
//*********************************************************************
CQSRelExprCANodeIdMap::CQSRelExprCANodeIdMap(ULng32 init_size,
CollHeap *outHeap):
HASHDICTIONARY(ULng32, CQSRelExprCANodeIdPair)
( &(CQSRelExprCANodeIdMap::HashFn),
init_size,
TRUE, // uniqueness
outHeap)
{}
//*******************************************************************
// A hash function required by Hashdictionary
//*******************************************************************
ULng32 CQSRelExprCANodeIdMap::HashFn(const ULng32 &key)
{
return key;
}
//**************************************************************
// Given the RelExpr pointer, this method gives the table subsets
//
//**************************************************************
CQSRelExprCANodeIdPair * CQSRelExprCANodeIdMap::get(RelExpr *key )
{
ULng32 *myKey_ = (ULng32 *)new (CmpCommon::statementHeap()) Long;
*(Long *)myKey_ = (const Long) (key);
CQSRelExprCANodeIdPair *result =
HASHDICTIONARY(ULng32, CQSRelExprCANodeIdPair)::getFirstValue(myKey_);
return result;
}
//****************************************************************
//
//****************************************************************
void CQSRelExprCANodeIdMap::insertThisElement(RelExpr * expr,
CQSRelExprCANodeIdPair *cqsNodeId)
{
ULng32 * myKey_ = (ULng32 *)new (CmpCommon::statementHeap())
Long;
*(Long *)myKey_ = (Long) (expr);
insert(myKey_, cqsNodeId);
}
//*********************************************************************
// pointer "this" is group by CQS expression. If the corresponding normalized
// relExpr is a group by, we process that expression; it could be a JBBC or
// not. If not we ignore it. This is because a group by may correspond to
// several groupby expressions at the end of optimization
//*********************************************************************
RelExpr *GroupByAgg::generateMatchingExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *relExpr)
{
if (relExpr->getOperator().match(REL_ANY_GROUP))
{
return CURRSTMT_CQSWA->checkAndProcessGroupByJBBC(relExpr,
lChildSet,
rChildSet,
this);
}
else
{
return RelExpr::generateMatchingExpr(lChildSet, rChildSet, relExpr);
}
} // GroupByAgg::generateMatchingExpr()
//************************************************************
// The default Implementation simply calls the routine on the
// child expression
//************************************************************
RelExpr *RelExpr::generateMatchingExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *relExpr)
{
CANodeIdSet leftTableSet, rightTableSet;
// throw an exception if the arity is not one
if (getArity() != 1)
AssertException("", __FILE__, __LINE__).throwException();
RelExpr *wcChild = child(0);
CURRSTMT_CQSWA->getTableSets(wcChild, leftTableSet, rightTableSet);
return (child(0)->generateMatchingExpr(leftTableSet,
rightTableSet,
relExpr));
} // RelExpr::generateMatchingExpr()
//*****************************************************************
// This recursive procedure traverses Join back bone of CQS tree and
// generates the logical relational expression using the normalized
// expression tree, relExpr.
//
//*****************************************************************
RelExpr *JoinForceWildCard::generateMatchingExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *relExpr)
{
Join *j = NULL;
// check if the argument relExpr is a join; give an error if not ?
if ( relExpr->getOperator().match(REL_ANY_JOIN) ||
relExpr->getOperatorType() == REL_MULTI_JOIN)
{
j = (Join *)relExpr->generateLogicalExpr(lChildSet, rChildSet);
}
else
{
// index join?
if (
(relExpr->getOperator().match(REL_SCAN)) &&
(lChildSet.isEmpty() && rChildSet.entries() == 1)
)
return relExpr;
if (relExpr->getOperator().match(REL_ANY_GROUP))
{
return CURRSTMT_CQSWA->checkAndProcessGroupByJBBC(relExpr,
lChildSet,
rChildSet,
this );
}
// throw an exception, otherwise
AssertException("", __FILE__, __LINE__).throwException();
}
if (j != NULL)
{
RelExpr *lChild = j->child(0);
RelExpr *rChild = j->child(1);
RelExpr *jwc = this;
CANodeIdSet leftTableSet, rightTableSet;
RelExpr *wcLeftChild = jwc->child(0);
RelExpr *wcRightChild = jwc->child(1);
CURRSTMT_CQSWA->getTableSets(wcLeftChild, leftTableSet, rightTableSet);
j->child(0) = wcLeftChild->generateMatchingExpr(leftTableSet,
rightTableSet,
lChild);
CURRSTMT_CQSWA->getTableSets(wcRightChild, leftTableSet, rightTableSet);
j->child(1) = wcRightChild->generateMatchingExpr(leftTableSet,
rightTableSet,
rChild
);
j->pushdownCoveredExpr
(j->getGroupAttr()->getCharacteristicOutputs(),
j->getGroupAttr()->getCharacteristicInputs(),
j->selectionPred());
return j;
}
return NULL;
} //JoinForceWildCard::generateMatchingExpr()
//*******************************************************************
// check if a join's child is a GB and if it is a JBBC handle it
// appropriately
//*******************************************************************
RelExpr * CqsWA::checkAndProcessGroupByJBBC( RelExpr *relExpr,
CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *cqsExpr)
{
if (!relExpr->getOperator().match(REL_ANY_GROUP))
{
return relExpr;
}
// check if this GroupBy is a JBBC...
GBAnalysis *pGBAnalysis = NULL;
NodeAnalysis *nodeAnalysis = relExpr->getGroupAttr()->getGroupAnalysis()->
getNodeAnalysis();
GroupByAgg * gb = (GroupByAgg *) relExpr;
pGBAnalysis = gb->getGBAnalysis();
if (pGBAnalysis)
{
CANodeId id = nodeAnalysis->getId();
// you may not need this check
if (! QueryAnalysis::Instance()->getJBBCs().containsThisId(id))
AssertException("", __FILE__, __LINE__).throwException();
// get the child of GroupBy and re-arrange the join tree from
// TableSets....
RelExpr *childExpr = relExpr->child(0);
RelExpr *wcChild;
if (cqsExpr->getOperator().match(REL_ANY_GROUP))
{
wcChild = cqsExpr->child(0);
getTableSets(wcChild,lChildSet, rChildSet);
}
else
wcChild = cqsExpr;
relExpr->child(0) = wcChild->generateMatchingExpr
(lChildSet,rChildSet,childExpr);
relExpr->primeGroupAttributes();
relExpr->pushdownCoveredExpr
(relExpr->getGroupAttr()->getCharacteristicOutputs(),
relExpr->getGroupAttr()->getCharacteristicInputs(),
relExpr->selectionPred());
relExpr->synthLogProp();
return relExpr;
}
else
{
// not a JBBC
return relExpr;
// handle the case right child of a join GB<-Scan ?
}
// error
return NULL; // keep VisualC++ happy
}// CqsWA::checkAndProcessGroupByJBBC()
//*************************************************************************
// Given an expression from CQS tree as input, this method returns TableSets
// of its two children
//**************************************************************************
void CqsWA::getTableSets(RelExpr * cqsExpr,
CANodeIdSet & leftSet,
CANodeIdSet &rightSet)
{
CQSRelExprCANodeIdPair *NodeIdSets = getcqsCANodeIdMap()->get(cqsExpr);
leftSet = NodeIdSets->leftChildSet_;
rightSet = NodeIdSets->rightChildSet_;
} // CqsWA::getTableSets()
//************************************************************************
// We essentially ignore the exchange wild card: simply pass the control to
// it's child
//************************************************************************
RelExpr *ExchangeForceWildCard::generateMatchingExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *relExpr)
{
// check the rChildSet is empty
if (! rChildSet.isEmpty())
AssertException("", __FILE__, __LINE__).throwException();
CANodeIdSet leftTableSet, rightTableSet;
RelExpr *wcChild = child(0);
CURRSTMT_CQSWA->getTableSets(wcChild, leftTableSet, rightTableSet);
return child(0)->generateMatchingExpr(leftTableSet,
rightTableSet,
relExpr);
} // ExchangeForceWildCard::generateMatchingExpr()
//***********************************************************************
// return the relExpr as it is. we do error checking
//***********************************************************************
RelExpr *ScanForceWildCard::generateMatchingExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet,
RelExpr *relExpr)
{
// check lChildSet and rChildSet are empty
// check relExpr is a Scan
if ( relExpr->getOperator().match(REL_SCAN) &&
lChildSet.isEmpty() &&
rChildSet.isEmpty()
)
return relExpr;
else
{
AssertException("", __FILE__, __LINE__).throwException();
return NULL; // keep VisualC++ happy
}
} // ScanForceWildCard::generateMatchingExpr()
RelExpr * RelExpr::generateLogicalExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet)
{
AssertException("", __FILE__, __LINE__).throwException();
return NULL; // keep VisualC++ happy
}
//**************************************************************
// Split the join backbone along the requested child backbones
// returns a join node, if such a split is possible
// throws an exception otherwise.
//**************************************************************
RelExpr * MultiJoin::generateLogicalExpr (CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet)
{
Join *j = splitByTables(lChildSet, rChildSet);
// if the split is not possible, throw an exception
if (j == NULL)
AssertException("", __FILE__, __LINE__).throwException();
j->child(0)->synthLogProp();
j->child(1)->synthLogProp();
j->synthLogProp();
return j;
}
RelExpr * GroupByAgg::generateLogicalExpr(CANodeIdSet &lChildSet,
CANodeIdSet &rChildSet)
{
AssertException("", __FILE__, __LINE__).throwException();
return NULL; // keep VisualC++ happy
}
NABoolean RelRoot::forceCQS(RelExpr *cqsExpr)
{
RelExpr *nqtExpr = this;
if (CmpCommon::getDefault(FORCE_BUSHY_CQS) != DF_ON)
return FALSE;
// make a copy of nqtExpr. In case we encounter exceptions and unable
// to proceed, we give back the saved expression for further processing
// Take care of transitively called CmpAsserts.:w
RelExpr *rootExpr = nqtExpr;
RelExpr *nqtCopyExpr = nqtExpr->child(0)->
copyRelExprTree(CmpCommon::statementHeap());
try
{
// do not bother with this if this query is simple: single table query etc.
// no updates, compound statements etc, describe, union
// if CQS relexpr contains CutOp do not continue
if (CqsWA::shouldContinue(nqtExpr, cqsExpr))
{
RelExpr *parentExpr = nqtExpr;
nqtExpr = nqtExpr->child(0);
RelExpr *topJoin= nqtExpr;
while (! topJoin->getOperator().match(REL_ANY_JOIN) &&
! (topJoin->getOperatorType() == REL_MULTI_JOIN))
{
if (topJoin->getOperator().match(REL_ANY_LEAF_OP))
{
AssertException("", __FILE__, __LINE__).throwException();
}
// we look for the top most join
parentExpr = topJoin;
topJoin = topJoin->child(0);
if (topJoin == NULL)
{
AssertException("", __FILE__, __LINE__).throwException();
}
} // while no join is found
CURRENTSTMT->initCqsWA();
CURRSTMT_CQSWA->initialize(nqtExpr, cqsExpr);
CANodeIdSet leftTableSet, rightTableSet;
CURRSTMT_CQSWA->getTableSets(cqsExpr,leftTableSet,rightTableSet);
RelExpr *childExpr = cqsExpr->generateMatchingExpr(leftTableSet,
rightTableSet,
topJoin);
parentExpr->child(0) = childExpr;
ValueIdList orderByList = reqdOrder();
ValueIdSet valuesNeeded = parentExpr->getGroupAttr()->
getCharacteristicOutputs();
// now add orderByList, if any, to expected outputs of Root's child
// this is needed so that child can synthesize and keep the sortkey.
// see the related code in RelRoot::normalizeNode() and
// PhysicalProperty::enforceCoverageByGroupAttributes(). The latter
// resets sortKey if it is not part of child's output. This needs to be
// investigated at a latter time.
valuesNeeded.insertList(orderByList);
parentExpr->pushdownCoveredExpr
(valuesNeeded,
parentExpr->getGroupAttr()->getCharacteristicInputs(),
parentExpr->selectionPred());
parentExpr->synthLogProp();
CURRSTMT_CQSWA->reArrangementSuccessful_ = TRUE;
return TRUE;
} // shouldContinue?
return FALSE;
}
catch(...)
{
// decide on what message to give...
rootExpr->child(0)=nqtCopyExpr;
// reset any thing else?
CURRENTSTMT->clearCqsWA();
}
return FALSE;
}
NABoolean CqsWA::shouldContinue(RelExpr *nqtExpr, RelExpr *cqsExpr)
{
// check if the Normalized expression contains any unsupported operators.
if (CqsWA::containsNotSupportedOperator(nqtExpr))
return FALSE;
// check if the CQS expression contains any cuts..
if (CqsWA::containsCutOp(cqsExpr))
return FALSE;
return TRUE;
} // CqsWA::shouldContinue()
NABoolean CqsWA::containsNotSupportedOperator(RelExpr *nqtExpr)
{
if (nqtExpr->getOperatorType() == REL_COMPOUND_STMT ||
nqtExpr->getOperator().match(REL_ANY_GEN_UPDATE) ||
nqtExpr->getOperatorType() == REL_UNION ||
nqtExpr->getOperatorType() == REL_DESCRIBE ||
nqtExpr->getOperatorType() == REL_TUPLE_LIST ||
nqtExpr->getOperatorType() == REL_TUPLE ||
nqtExpr->getOperatorType() == REL_DDL)
return TRUE;
for (Int32 i=0; i < nqtExpr->getArity(); i++)
{
if (containsNotSupportedOperator(nqtExpr->child(i)))
return TRUE;
}
return FALSE;
} //CqsWA::containsNotSupportedOperator()
NABoolean CqsWA::containsCutOp(RelExpr *cqsExpr)
{
if (cqsExpr->isCutOp())
return TRUE;
for (Int32 i = 0; i < cqsExpr->getArity(); i++)
{
if (containsCutOp(cqsExpr->child(i)))
return TRUE;
}
return FALSE;
} // CqsWA::containsCutOp()
//***************************************************************
// if tablename is not of form cat.sch.t, make it so
//
//***************************************************************
NAString CqsWA::makeItThreePartAnsiString(const NAString & tableName)
{
NAString tableNameAppend(CmpCommon::statementHeap());
size_t catlen, schlen;
catlen = tableName.first('.');
schlen = tableName.last('.');
SchemaName s = CmpCommon::context()->schemaDB_->getDefaultSchema();
size_t len = tableName.length();
if ((catlen > len) && (schlen > len ))
{
// append current catalog and schema names...
tableNameAppend += s.getCatalogName();
tableNameAppend += '.';
tableNameAppend += s.getSchemaName();
tableNameAppend += '.';
tableNameAppend += tableName;
}
else if ((catlen > len) && (schlen < len ))
{
// append catalog name
tableNameAppend += s.getCatalogName();
tableNameAppend += '.';
tableNameAppend += tableName;
}
else
{
tableNameAppend = tableName;
}
return tableNameAppend;
} // CqsWA::makeItThreePartAnsiString()
//***********************************************************
// check if the CQS Relational expression is an index join
//***********************************************************
NABoolean CqsWA::isIndexJoin(RelExpr *cqsExpr)
{
if (cqsExpr->getArity() == 1) return FALSE;
if (cqsExpr->getOperator().match(REL_FORCE_JOIN) ||
cqsExpr->getOperator().match(REL_FORCE_NESTED_JOIN) ||
cqsExpr->getOperator().match(REL_FORCE_HASH_JOIN) ||
cqsExpr->getOperator().match(REL_FORCE_MERGE_JOIN)
)
{
JoinForceWildCard *jwc= (JoinForceWildCard *)cqsExpr;
if (jwc->getPlan() == JoinForceWildCard::FORCED_INDEXJOIN)
return TRUE;
return FALSE;
}
else
return FALSE;
} // CqsWA::isIndexJoin()
//***************************************************************
// is tableName an MP table: does it look like $vol.subvol.tname?
//***************************************************************
NABoolean CqsWA::isMPTable(const NAString &tableName)
{
size_t volumeLength, schemaLength, nameLength;
nameLength = tableName.length();
volumeLength = tableName.first('$');
if (volumeLength < nameLength)
{
schemaLength=tableName.last('.');
if ( schemaLength < nameLength &&
volumeLength < schemaLength)
{
return TRUE;
}
else
{
AssertException("", __FILE__, __LINE__).throwException();
return FALSE; // keep VisualC++ happy
}
}
else
{
return FALSE;
}
} // CqsWA::isMPTable()