blob: 57cbeaced39014d2bde0cc7ef78cdb55643d6976 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: Analyzer.cpp
* Description: Query Analyzer classes and methods
*
* Created: 6/7/2002
* Language: C++
*
*
*
*
******************************************************************************
*/
//#include <process.h>
#include "Analyzer.h"
#include "MultiJoin.h"
#include "AppliedStatMan.h"
#include "opt.h"
#include "RelExpr.h"
#include "TransRule.h"
//tmp includes for join and other relexpr methods
#include "RelJoin.h"
#include "RelScan.h"
#include "RelGrby.h"
#include "RelUpdate.h"
#include "RelRoutine.h"
#include "RelExeUtil.h"
#include "QRDescGenerator.h"
#include "QueryRewriteHandler.h"
#include "Globals.h"
void forceCQS1(RelExpr *, RelExpr *);
// tmp methods
void xxx()
{
CMPASSERT(FALSE);
}
void revisitLater()
{
CMPASSERT(FALSE);
}
// -----------------------------------------------------------------------
// Methods for class CANodeId
// -----------------------------------------------------------------------
EstLogPropSharedPtr CANodeId::getJBBInput()
{
JBB * jbb = NULL;
if(getNodeAnalysis()->getJBBC())
jbb = getNodeAnalysis()->getJBBC()->getJBB();
EstLogPropSharedPtr jbbInLP = (*GLOBAL_EMPTY_INPUT_LOGPROP);
if(jbb)
jbbInLP = jbb->getInputEstLP();
return jbbInLP;
}
// -----------------------------------------------------------------------
// Methods for class CANodeIdSet
// -----------------------------------------------------------------------
CANodeIdSet::CANodeIdSet(CollHeap *outHeap) :
SUBARRAY(NodeAnalysis *)(NULL, outHeap)
{}
CANodeIdSet::CANodeIdSet(const CANodeIdSet &other, CollHeap *outHeap) :
SUBARRAY(NodeAnalysis *)(other, outHeap)
{}
CANodeIdSet::CANodeIdSet(const CANodeId& id, CollHeap *outHeap) :
SUBARRAY(NodeAnalysis *)(NULL, outHeap)
{
addElement(id.id_);
}
CANodeId CANodeIdSet::getFirst() const
{
CANodeId id = init();
if (NOT next(id))
id = NULL_CA_ID;
return id;
}
CANodeIdSet CANodeIdSet::operator +(const CANodeIdSet & other) const
{
CANodeId id_;
CANodeIdSet result(*this, CmpCommon::statementHeap());
for (id_ = other.init();
other.next(id_);
other.advance(id_))
{
result.addElement(id_);
}
return result;
}
// Assumption: each member of this set represents a jbbc
//
// A set is legal if:
// * the predecessors required for each jbbc are present
// in the set
// AND
// * there is atleast on jbbc connected via a InnerNonSemi
// join
NABoolean CANodeIdSet::legal() const
{
CANodeIdSet nodeSet(*this);
CANodeId i;
NABoolean hasInnerNonSemiNonTSJJoin = FALSE;
JBBSubset * jbbSubset = jbbcsToJBBSubset();
if(jbbSubset && (jbbSubset->getLegality() != NOT_KNOWN))
{
if(jbbSubset->getLegality() == LEGAL)
return TRUE;
return FALSE;
}
for (i = init(); next(i); advance(i))
{
Join * parentJoin =
i.getNodeAnalysis()->getJBBC()->getOriginalParentJoin();
// if a jbbc is connected via a InnerNonSemi join it is
// legal. It can be legally present anywhere and does not
// depend on the presence of a predecessor. Such guys are
// citizens they don't need anyone to sponsor them to make
// them legal :)
if ((!parentJoin) ||
(parentJoin->isInnerNonSemiNonTSJJoin()))
// an interesting thing to note is that sometimes jbbcs
// connected via a special join e.g. left join don't have
// any join predicate. In such a case the jbbc would be
// legal by itself if we only checked for predecessors.
// But since such a jbbc can only be the right child of a
// join we require that there is atleast one jbbc connected
// via a InnerNonSemi join so that it can be the left child
// of the left most join.
hasInnerNonSemiNonTSJJoin = TRUE;
else{
// if a jbbc is connected via a special join (i.e. left,
// semi, anti semi) then it needs some predecessors to
// make it legal. He needs some predecessor to sponsor
// him to make him a legal citizen. The legal addition
// check below ensures that jbbc i is a legal citizen :)
nodeSet -= i;
if(!nodeSet.legalAddition(i))
{
if(jbbSubset)
jbbSubset->setLegality(ILLEGAL);
return FALSE;
}
nodeSet += i;
}
}
if(hasInnerNonSemiNonTSJJoin)
{
if(jbbSubset)
jbbSubset->setLegality(LEGAL);
return TRUE;
}
if(jbbSubset)
jbbSubset->setLegality(ILLEGAL);
return FALSE;
}
NABoolean CANodeIdSet::legalAddition(CANodeId newNode) const
{
JBBC* newNodeJBBC = newNode.getNodeAnalysis()->getJBBC();
Join * newNodeParentJoin = newNodeJBBC->getOriginalParentJoin();
// left most child in the original join ordering
if(!newNodeParentJoin)
return TRUE;
// right child of an inner join
else if (newNodeParentJoin->isInnerNonSemiNonTSJJoin())
return TRUE;
// right child of left, semi, anti-semi join, routine join
else{
CANodeIdSet predecessorJBBCs = newNodeJBBC->getPredecessorJBBCs();
// if the newNode is connect via a:
// * left outer join
// * semi join
// * anti semi join
// * routine join
// Then make sure the dependencies are satisfied
// return false since there are no JBBCs in 'this' set
// therefore any dependency will not be satisfied
if(!entries())
return FALSE;
if(CmpCommon::getDefault(ASYMMETRIC_JOIN_TRANSFORMATION) == DF_MINIMUM)
{
if(!contains(predecessorJBBCs))
return FALSE;
}
else{
if(contains(predecessorJBBCs))
return TRUE;
if(predecessorJBBCs.entries() < 2)
{
if(!contains(predecessorJBBCs))
return FALSE;
}
else{
JBBSubset * jbbSubset = jbbcsToJBBSubset();
// predicates for which newNode is successor
ValueIdSet dependencyPreds =
jbbSubset->getPredsWithSuccessors();
ValueIdSet newNodePredsWithPredecessors =
newNodeJBBC->getPredsWithPredecessors();
newNodePredsWithPredecessors -=
newNodeJBBC->getConstPredsWithPredecessors();
dependencyPreds.intersectSet(newNodePredsWithPredecessors);
CANodeIdSet combinedSet;
combinedSet += *this;
combinedSet += newNode;
JBBSubset * combinedJBBSubset = combinedSet.jbbcsToJBBSubset();
if(newNodeParentJoin && newNodeParentJoin->isRoutineJoin())
{
ValueIdSet newNodeInputsRequiredFromSiblings =
newNodeJBBC->getInputsRequiredFromSiblings();
// check if input required from siblings is covered
for (ValueId input= newNodeInputsRequiredFromSiblings.init();
newNodeInputsRequiredFromSiblings.next(input);
newNodeInputsRequiredFromSiblings.advance(input) )
{
NABoolean inputIsCovered = combinedJBBSubset->coversExpr(input);
if(!inputIsCovered)
return FALSE;
}
}
// if all predicates with predecessors for the new node
// are not covered then this is not a legal addition
if (dependencyPreds.entries() != newNodePredsWithPredecessors.entries())
return FALSE;
// if all predicates with predecessors for the new node
// are covered, we need to do a coverage test. This is
// needed because the predicate could be a 'hyper' join
// pred i.e. a predicate that required presence of all
// successors and not just one of them e.g. t3.a = t1.b + t2.c.
// If t2 and t3 are successors then the predicate requires all
// successors. In case like:
//
// select *
// from
// t1
// join
// t2 on t1.a = t2.a
// left join
// t3 on t1.a = t3.a;
//
// In the case above sets {t1,t3} and {t2,t3} are legal
// We can have anyone of t1 or t2 paired up with t3
// Of course set {t3} would be illegal. In the hyper join
// case the {t1, t3} and {t2, t3} are not legal rather
// {t1, t2, t3} is legal of course {t1,t2} would be legal
// This is because we need all three to evaluate the predicate
// t3.a = t1.b + t2.c
for (ValueId x= newNodePredsWithPredecessors.init();
newNodePredsWithPredecessors.next(x);
newNodePredsWithPredecessors.advance(x) )
{
NABoolean predIsCovered = combinedJBBSubset->coversExpr(x);
if(!predIsCovered)
return FALSE;
}
}
}
}
return TRUE;
}
// Find the jbbc from the jbbsubset, that has minimum connections
// If there are more than one jbbcs, with the same number of
// connections, pick the first one.
CANodeId CANodeIdSet::getJBBCwithMinConnectionsToThisJBBSubset() const
{
CollIndex noOfConn = MAX_COLL_INDEX;
CANodeId childWithMinConnections = getFirst();
CANodeIdSet jbbcsWithMinConnections(childWithMinConnections);
CostScalar maxEqFactor = csZero;
CANodeId id;
if (entries() == 1)
return childWithMinConnections;
// For two way joins make sure to return a child that can be the
// right side of a join. If one child is the right child of a
// left, semi, anti-semi join then that child should be returned.
// This is because that child cannot be the left child of a join
if (entries() == 2){
for(id = init(); next(id); advance(id))
{
JBBC* idJbbc = id.getNodeAnalysis()->getJBBC();
Join * jbbcParent = idJbbc->getOriginalParentJoin();
if (jbbcParent && (!jbbcParent->isInnerNonSemiNonTSJJoin()))
childWithMinConnections = id;
}
return childWithMinConnections;
}
// traverse thru all the nodes to check if
// this set has only one node connected via
// a inner non semi join
UInt32 numNodesConnectedViaInnerNonSemiJoin = 0;
CANodeId nodeConnectedViaInnerNonSemiJoin;
for(id = init(); next(id); advance(id))
{
JBBC* idJbbc = id.getNodeAnalysis()->getJBBC();
Join * jbbcParent = idJbbc->getOriginalParentJoin();
if((!jbbcParent) ||
(jbbcParent->isInnerNonSemiNonTSJJoin()))
{
numNodesConnectedViaInnerNonSemiJoin++;
nodeConnectedViaInnerNonSemiJoin = id;
}
}
// traverse thru all connections passed
// to determine the one with minimum connection
for (id = init();
next(id);
advance(id))
{
JBBC* idJbbc = id.getNodeAnalysis()->getJBBC();
// dont pick the only node that can be a left child of a join
if((numNodesConnectedViaInnerNonSemiJoin == 1) &&
(id == nodeConnectedViaInnerNonSemiJoin ))
continue;
CANodeIdSet jbbcsJoinedToThisSet(*this);
jbbcsJoinedToThisSet.intersectSet(idJbbc->getJoinedJBBCs());
if (idJbbc->getSuccessorJBBCs().entries() != 0)
{
CANodeIdSet dependentJbbcs(*this);
dependentJbbcs.intersectSet(idJbbc->getSuccessorJBBCs());
if (dependentJbbcs.entries() != 0)
continue;
}
CollIndex connectedJBBCsCount = jbbcsJoinedToThisSet.entries();
// get the connections from this subset
if ( connectedJBBCsCount < noOfConn)
{
noOfConn = connectedJBBCsCount;
// flush the previously stored conflicitng JBBCs and start a new set
jbbcsWithMinConnections.clear();
jbbcsWithMinConnections.insert(id);
childWithMinConnections = id;
}
else
{
// collect all conflicting nodes
if ( connectedJBBCsCount == noOfConn)
{
noOfConn = connectedJBBCsCount;
jbbcsWithMinConnections.insert(id);
}
}
} // for loop to pick the jbbc with min connections, and collect if there are
// more than one with min connections.
// debugging code
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << endl
<< "jbbcs with minimum connection "\
<< jbbcsWithMinConnections.getText();
CURRCONTEXT_OPTDEBUG->stream() << "Number of connections "\
<< istring(Lng32(noOfConn))\
<<endl;
}
#endif
if (jbbcsWithMinConnections.entries() > 1)
{
CostScalar smallestTableSize = COSTSCALAR_MAX;
CostScalar currentTableSize = csOne;
// traverse thru all conflicting nodes (nodes with minimum connection)
for (id = jbbcsWithMinConnections.init();
jbbcsWithMinConnections.next(id);
jbbcsWithMinConnections.advance(id))
{
// if the node is not a table, we continue
TableAnalysis * currentTA = id.getNodeAnalysis()->getTableAnalysis();
if (!currentTA) continue;
// get all the nodes, this one is connected to
CANodeIdSet jbbcsConnectedToId(*this);
jbbcsConnectedToId.subtractElement(id);
// out of the remaining nodes, traverse thru the ones that are
// connected to current id.
CANodeIdSet connectedNodes = currentTA->getConnectedTables();
jbbcsConnectedToId.intersectSet(connectedNodes);
for (CANodeId id2 = jbbcsConnectedToId.init();
jbbcsConnectedToId.next(id2);
jbbcsConnectedToId.advance(id2) )
{
// get all columns with equality predicates between this
// and the connecting node
ValueIdSet idCols = currentTA->getColsConnectedViaVEGPreds(id2);
// get the equality factor for this set of columns.
// Equality factor is defined as UEC / base row count.
CostScalar eqFactor = currentTA->getEqualityVal(idCols);
currentTableSize = currentTA->getCardinalityOfBaseTable();
if ((eqFactor > maxEqFactor) ||
((!CURRSTMT_OPTDEFAULTS->isComplexMJQuery()) &&
(eqFactor == maxEqFactor) &&
(currentTableSize < smallestTableSize)))
{
// save the node with max equality factor
maxEqFactor = eqFactor;
childWithMinConnections = id;
smallestTableSize = currentTableSize;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Connections tried "\
<< id.getText() \
<< " with " << id2.getText() \
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Number of columns "\
<< istring(Lng32(idCols.entries()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Equality factor "\
<< eqFactor.value() \
<< endl
<< endl;
}
#endif
} // for loop to traverse all two way connections
} // For loop to traverse over all confliciting ID, computing their equality factors
} // end if number of conflicting jbbcs > 1
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "CANodeId picked up "\
<< childWithMinConnections.getText() \
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Max Equality factor "\
<< maxEqFactor.value() \
<< endl
<< endl;
}
#endif
return childWithMinConnections;
}
// get minimum estimated row count of the group from amongst the nodeSet
CostScalar CANodeIdSet::getMinChildEstRowCount() const
{
CostScalar minEstCard = COSTSCALAR_MAX;
CostScalar minCard = csOne;
for (CANodeId nodeId = init();
next(nodeId);
advance(nodeId) )
{
minCard = (nodeId.getNodeAnalysis()->getOriginalExpr()->getGroupAttr()->getMinChildEstRowCount()).minCsOne();
if (minCard < minEstCard)
minEstCard = minCard;
}
return minEstCard;
}
// compute the JBBSubset structure for this CANodeIdSet
// Note that not every CANodeIdSet has a JBBSubset (for example
// all JBBCs has to be of the same JBB). The method verify()
// on JBBSubset can be used to verify this condition.
JBBSubset * CANodeIdSet::computeJBBSubset() const
{
QueryAnalysis* qa = QueryAnalysis::Instance();
CANodeIdSet jbbcs(*this);
jbbcs.intersectSet(qa->getJBBCs());
JBBSubset* result = jbbcs.jbbcsToJBBSubset();
CANodeIdSet gbs(*this);
gbs.intersectSet(qa->getGBs());
if (gbs.entries() > 0)
{
CMPASSERT(gbs.entries() == 1);
CANodeId gb = gbs.getFirst();
result->setGB(gb);
}
return result;
}
EstLogPropSharedPtr CANodeIdSet::getJBBInput() const
{
JBB * jbb = NULL;
CANodeId jbbcId = getFirst();
if(jbbcId != NULL_CA_ID)
return jbbcId.getJBBInput();
return (*GLOBAL_EMPTY_INPUT_LOGPROP);
}
// This method also compute a JBBSubset but much faster than
// computJBBSubset() because it assume the CANodeIdSet contains
// only sibling JBBCs. Use only if that condition is true (which
// is the case for most CANodeIdSet values generated by getJoinedJBBCs()
// and similar methods on JBBC and JBBSubset).
JBBSubset * CANodeIdSet::jbbcsToJBBSubset() const
{
JBBSubset* result = new (CmpCommon::statementHeap())
JBBSubset(CmpCommon::statementHeap());
result->setJBBCs(*this);
return result;
}
const NAString CANodeIdSet::getText() const
{
NAString result("CANodeIdSet: {");
for (CANodeId x= init(); next(x); advance(x) )
{
result += " " + istring(x) + " ";
}
result += "}\n";
return result;
}
void CANodeIdSet::display() const
{
print();
}
void CANodeIdSet::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// Methods for class CANodeIdSetMap
// -----------------------------------------------------------------------
CANodeIdSetMap::CANodeIdSetMap(ULng32 init_size,
CollHeap *outHeap):
HASHDICTIONARY(CANodeIdSet, JBBSubsetAnalysis) (&(CANodeIdSetMap::Hasher),
init_size,
TRUE, // uniqueness constraint
outHeap )
{
hits_ = 0;
misses_ = 0;
}
ULng32 CANodeIdSetMap::Hasher(const CANodeIdSet & key)
{
return key.hash();
}
JBBSubsetAnalysis* CANodeIdSetMap::getFirstValue(const CANodeIdSet* key) const
{
JBBSubsetAnalysis* result =
HASHDICTIONARY(CANodeIdSet, JBBSubsetAnalysis)::getFirstValue(key);
// Record hit and miss stat (may be debug code only)
if (result)
((CANodeIdSetMap*)this)->hits_++; // cast to non-const
else
((CANodeIdSetMap*)this)->misses_++;
return result;
}
// -----------------------------------------------------------------------
// Methods for class QueryAnalysis
// -----------------------------------------------------------------------
// construct a QueryAnalysis
QueryAnalysis::QueryAnalysis(CollHeap *outHeap, NABoolean analysis):
jbbSubsetMap_(107,outHeap),
analysisON_(analysis),
multiJoinsUsed_(FALSE),
optimizeForFirstNRows_(FALSE),
compressedHistsViable_(TRUE),
heap_(outHeap),
nodeAnalysisArray_(outHeap),
jbbArray_(outHeap),
allJBBCs_(outHeap),
allGBs_(outHeap),
origJBBTopExprs_(outHeap),
colAnalysisArray_(outHeap),
outputToJBBCsMap_(outHeap),
predAnalysisArray_(outHeap),
nodeMapArray_(outHeap),
mvQueryRewriteHandler_(new (outHeap) MvQueryRewriteHandler(outHeap)),
isMvCreation_(FALSE),
skippedSomeJoins_(FALSE),
dontSurfTheWave_(FALSE), // WaveFix
largestTable_(NULL),
joinedToLargestTable_(outHeap),
compilerPhase_(PRE_BINDER),
hasMandatoryXPComputed_(FALSE),
queryComplexityVector_(NULL),
highestNumOfPartns_(-1),
statsToDisplay_(NULL)
{
// create the table connectivity graph
tCGraph_ = new (heap_) TableConnectivityGraph(heap_);
// create a NULL entry for CANodeId = 0
nodeAnalysisArray_.insertAt(0,NULL);
// set the limit at which one JBB should reach in order to invoke MJ-Rewrite
multiJoinThreshold_ =
ActiveSchemaDB()->getDefaults().getAsLong(MULTI_JOIN_THRESHOLD);
// set the property of JoinOrder By User. May get reset by hint later
joinOrderByUser_ = (CmpCommon::getDefault(JOIN_ORDER_BY_USER) == DF_ON);
// initialize ASM only if allowed
if ((CmpCommon::getDefault(ASM_ALLOWED) == DF_ON) & analysisON_)
appStatMan_ = new (heap_) AppliedStatMan(heap_);
else
appStatMan_ = NULL;
}
// destruct a QueryAnalysis
QueryAnalysis::~QueryAnalysis()
{
delete tCGraph_;
delete appStatMan_;
delete mvQueryRewriteHandler_;
// should loop over other collections and delete later
}
// This is a temporary placement of initialization. Later it shall be
// moved to a common area where we can have all global initialization
// for the compiler
THREAD_P NABoolean CompCCAssert::useCCMPAssert_ = FALSE;
// -----------------------------------------------------------------------
// Analysis objects creation methods by QueryAnalysis
// -----------------------------------------------------------------------
// create a new NodeAnalysis for expr node and assign it new CANodeId
NodeAnalysis * QueryAnalysis::newNodeAnalysis(RelExpr* expr)
{
CollIndex newId = nodeAnalysisArray_.entries();
NodeAnalysis* result = new (heap_) NodeAnalysis(newId, expr, heap_);
nodeAnalysisArray_.insertAt(newId, result);
return result;
}
// create new JBB. The provided expr is the top node at the join
// backbone in the normalizer output. expr could be either a join
// or a group by on top of a join
JBB * QueryAnalysis::newJBB(RelExpr* expr)
{
CollIndex newId = jbbArray_.entries();
JBB* result = new (heap_) JBB(newId, heap_);
result->setNormInputs(expr->getGroupAttr()->getCharacteristicInputs());
result->setNormOutputs(expr->getGroupAttr()->getCharacteristicOutputs());
jbbArray_.insertAt(newId, result);
// if expr is not a join then its a GB, use its child instead
if (expr->getOperator().match(REL_ANY_JOIN))
origJBBTopExprs_.insert(expr);
else
origJBBTopExprs_.insert(expr->child(0));
return result;
}
// create a new TableAnalysis structure for this tableExpr
// currently only Scan and TableValuedFunction can have
// TableAnalysis structure associated with them
TableAnalysis * QueryAnalysis::newTableAnalysis(RelExpr* tableExpr)
{
// tableExpr could be of type Scan (most probably) or it could
// be of type TableValuedFunction. I could put the TableDesc
// extraction logic below in the pilotAnalysis implementation for
// these RelExprs and pass the TableDesc to the constructor.
// I decided however to do it this way in case we will need to
// compute something else that is expression based (other than
// TableDesc) inside TableAnalysis
TableDesc* tableDesc = NULL;
if (tableExpr->getOperator().match(REL_SCAN))
{
tableDesc = ((Scan*)tableExpr)->getTableDesc();
} else if ((tableExpr->getOperator().match(REL_UNARY_INSERT)) ||
(tableExpr->getOperator().match(REL_LEAF_INSERT))) {
tableDesc = ((Insert *)tableExpr)->getTableDesc();
} else if ((tableExpr->getOperator().match(REL_UNARY_UPDATE)) ||
(tableExpr->getOperator().match(REL_LEAF_UPDATE))) {
tableDesc = ((Update *)tableExpr)->getTableDesc();
} else if ((tableExpr->getOperator().match(REL_UNARY_DELETE)) ||
(tableExpr->getOperator().match(REL_LEAF_DELETE))) {
tableDesc = ((Delete *)tableExpr)->getTableDesc();
}
else if (tableExpr->getOperatorType() == REL_EXE_UTIL)
{
tableDesc = ((ExeUtilExpr*)tableExpr)->getVirtualTableDesc();
if (tableDesc == NULL)
return NULL;
}
else if ((tableExpr->getOperatorType() == REL_HBASE_ACCESS) ||
(tableExpr->getOperatorType() == REL_HBASE_COPROC_AGGR))
{
tableDesc = ((HbaseAccess*)tableExpr)->getTableDesc();
if (tableDesc == NULL)
return NULL;
}
else if (tableExpr->getOperatorType() == REL_HBASE_DELETE)
{
tableDesc = ((HbaseDelete*)tableExpr)->getTableDesc();
if (tableDesc == NULL)
return NULL;
}
else
{
tableDesc = ((TableValuedFunction*)tableExpr)->getTableDesc();
// some TableValuedFunction returns NULL tableDesc
if (tableDesc == NULL)
return NULL;
}
const IndexDesc *tableIndexDesc = tableDesc->getClusteringIndex();
// Record the highest no of partitions for a table among all of the tables
// in a query. This is used in opt.cpp to adjust max parallelism.
if (CmpCommon::getDefault(COMP_BOOL_24) == DF_ON)
{
// Get the number of partitions for the table
Lng32 tableNumOfPartns = tableIndexDesc->getNAFileSet()->getCountOfPartitions();
// Check and save highest no of partitions
if (highestNumOfPartns_ == -1)
highestNumOfPartns_ = tableNumOfPartns;
else
if (tableNumOfPartns > highestNumOfPartns_)
highestNumOfPartns_ = tableNumOfPartns;
}
// find NodeAnalysis. create new one if NULL
NodeAnalysis* nodeAnalysis = tableExpr->getGroupAnalysis()->getNodeAnalysis();
if (!nodeAnalysis)
{
nodeAnalysis = newNodeAnalysis(tableExpr);
tableExpr->getGroupAnalysis()->setNodeAnalysis(nodeAnalysis);
}
// create a new TableAnalysis and assign it to this Node
TableAnalysis* tabAnalysis =
new (heap_) TableAnalysis(nodeAnalysis, tableDesc, heap_);
nodeAnalysis->setTableAnalysis(tabAnalysis);
tableDesc->setTableAnalysis(tabAnalysis);
// find the used cols in this table
ValueIdSet usedCols;
// append columns referenced in the local predicates
ValueIdSet selectionPred = tableExpr->getSelectionPred();
selectionPred.findAllReferencedBaseCols(usedCols);
// append columns referenced in the scan required outputs
tableExpr->getGroupAttr()->getCharacteristicOutputs()
.findAllReferencedBaseCols(usedCols);
// add table's clustering key for index joins
ValueIdSet keyAsSet(tableIndexDesc->getIndexKey());
keyAsSet.findAllReferencedBaseCols(usedCols) ;
const ValueIdSet tableCols(tableDesc->getColumnList());
if (((tableExpr->getOperator().match(REL_UNARY_UPDATE)) ||
(tableExpr->getOperator().match(REL_LEAF_UPDATE)) ||
(tableExpr->getOperator().match(REL_UNARY_DELETE)) ||
(tableExpr->getOperator().match(REL_LEAF_DELETE))))
{
usedCols += tableCols;
}
// Remove columns that belong to other tables. Those arrive here because they
// are equivalent to one of this table used columns (already in usedCols)
usedCols.intersectSet(tableCols); // should i make tableCols a member of TA too?
// set the usedCols_ field in TableAnalysis and append to allUsedCols_
tabAnalysis->setUsedCols(usedCols);
allUsedCols_ += usedCols;
// initialize ColAnalysis objects for each usedCols (no need to do it for
// columns that are not used)
for (ValueId x= usedCols.init(); usedCols.next(x); usedCols.advance(x) )
{
ColAnalysis* colAn = new (heap_) ColAnalysis(x, tabAnalysis);
colAnalysisArray_.insertAt(x, colAn);
}
// If column x in usedCols is the underlying column of a computed column,
// compute the column analysis for the computed column.
ValueIdSet divColumns = tableDesc->getDivisioningColumns();
for (ValueId x= divColumns.init(); divColumns.next(x); divColumns.advance(x) )
{
// Generate ColAnalysis for every DIV column, regardless of whether
// it appears in a predicate or not. A leading DIV column that does
// not appear in a prdicate can still be consiered for MDAM scan over
// it. See keyless heuristics for NJ in JoinToTSJRule::topMatch() in
// TransRule.cpp.
ColAnalysis* colAn = new (heap_) ColAnalysis(x, tabAnalysis);
colAnalysisArray_.insertAt(x, colAn);
}
// By the same argument as for the divisioning columns, include the salt column.
ValueIdSet saltColumnSet = tableDesc->getSaltColumnAsSet();
for (ValueId x= saltColumnSet.init(); saltColumnSet.next(x); saltColumnSet.advance(x) )
{
ColAnalysis* colAn = new (heap_) ColAnalysis(x, tabAnalysis);
colAnalysisArray_.insertAt(x, colAn);
}
// initialize local preds. Add characteristics inputs as
// used cols, to account for host variables. Host variables
// come as characteristic inputs
ValueIdSet scanCoverageExprs =
tableExpr->getGroupAttr()->getCharacteristicInputs();
scanCoverageExprs += usedCols;
ValueIdSet localPreds(tableExpr->getSelectionPred());
localPreds.removeUnCoveredExprs(scanCoverageExprs);
tabAnalysis->setLocalPreds(localPreds);
//local preds not final, will be finalized at finishAnalysis
// initialize AccessPathAnalysis objects
tabAnalysis->initAccessPaths();
// set also the allSubtreeTables in groupAnalysis
CANodeIdSet tables(nodeAnalysis->getId(), heap_);
tableExpr->getGroupAnalysis()->setSubtreeTables(tables);
// add this nodeId to the list of table nodeIds in tCGraph
tCGraph_->tables_.insert(nodeAnalysis->getId());
return tabAnalysis;
}
// create a new RoutineAnalysis structure for this routineExpr
// currently only scalar udfs can have RoutineAnalysis structure
// associated with them
RoutineAnalysis * QueryAnalysis::newRoutineAnalysis(RelExpr* routineExpr)
{
RelRoutine * rExpr = (RelRoutine*) routineExpr;
RoutineDesc * routineDesc = rExpr->getRoutineDesc();
// find NodeAnalysis. create new one if NULL
NodeAnalysis* nodeAnalysis = routineExpr->getGroupAnalysis()->getNodeAnalysis();
if (!nodeAnalysis)
{
nodeAnalysis = newNodeAnalysis(routineExpr);
routineExpr->getGroupAnalysis()->setNodeAnalysis(nodeAnalysis);
}
// create a new RoutineAnalysis and assign it to this Node
RoutineAnalysis* routineAnalysis =
new (heap_) RoutineAnalysis(nodeAnalysis, routineDesc, heap_);
nodeAnalysis->setRoutineAnalysis(routineAnalysis);
routineDesc->setRoutineAnalysis(routineAnalysis);
return routineAnalysis;
}
void QueryAnalysis::addProducingJBBC(ValueId expr, CANodeId jbbc)
{
CANodeIdSet * jbbcsProducingOutput = NULL;
if (outputToJBBCsMap_.used(expr))
{
jbbcsProducingOutput = outputToJBBCsMap_[expr];
}
else{
jbbcsProducingOutput = new (STMTHEAP) CANodeIdSet();
outputToJBBCsMap_.insertAt(expr, jbbcsProducingOutput);
}
(*jbbcsProducingOutput) += jbbc;
}
// create a new JBBC for this expr (which is a join child
// of parent). Create a new NodeAnalysis if needed.
JBBC * QueryAnalysis::newJBBC(Join* parent, RelExpr* expr,
NABoolean isFullOuterJoinOrTSJJBBC)
{
// find NodeAnalysis. create new one if NULL
NodeAnalysis* nodeAnalysis = expr->getGroupAnalysis()->getNodeAnalysis();
if (!nodeAnalysis)
{
nodeAnalysis = newNodeAnalysis(expr);
expr->getGroupAnalysis()->setNodeAnalysis(nodeAnalysis);
}
JBBC* jbbc = new (heap_) JBBC(nodeAnalysis, parent, heap_);
if(isFullOuterJoinOrTSJJBBC)
jbbc->setIsFullOuterJoinOrTSJJBBC();
if((expr->getOperator() == REL_SCAN) &&
expr->isExtraHub())
jbbc->isGuaranteedEqualizer_=1;
if( parent &&
parent->isRoutineJoin())
jbbc->setRoutineJoinFilterPreds(parent->getSelectionPred());
nodeAnalysis->setJBBC(jbbc);
// add this nodeId to allJBBCs
allJBBCs_.insert(nodeAnalysis->getId());
if( parent )
{
if(parent->isInnerNonSemiNonTSJJoin())
innerNonSemiNonTSJJBBCs_ += nodeAnalysis->getId();
else if(parent->isRoutineJoin())
routineJoinJBBCs_ += nodeAnalysis->getId();
else if(parent->isLeftJoin())
leftJoinJBBCs_ += nodeAnalysis->getId();
else if( parent->isSemiJoin() || parent->isAntiSemiJoin())
semiJoinJBBCs_ += nodeAnalysis->getId();
else
CMPASSERT(FALSE);
}
else
innerNonSemiNonTSJJBBCs_ += nodeAnalysis->getId();
ValueIdSet jbbcOutputs = expr->getGroupAttr()->getCharacteristicOutputs();
for (ValueId jbbcOutput = jbbcOutputs.init();
jbbcOutputs.next(jbbcOutput);
jbbcOutputs.advance(jbbcOutput))
{
addProducingJBBC(jbbcOutput, nodeAnalysis->getId());
CANodeIdSet * jbbcsProducingOutput = NULL;
if (outputToJBBCsMap_.used(jbbcOutput))
{
jbbcsProducingOutput = outputToJBBCsMap_[jbbcOutput];
}
else{
jbbcsProducingOutput = new (STMTHEAP) CANodeIdSet();
outputToJBBCsMap_.insertAt(jbbcOutput, jbbcsProducingOutput);
}
(*jbbcsProducingOutput) += nodeAnalysis->getId();
}
return jbbc;
}
// create a new GBAnalysis
GBAnalysis * QueryAnalysis::newGBAnalysis(GroupByAgg* gb)
{
// Note that calling gb->getGroupAnalysis()->getNodeAnalysis() would
// give you a different NodeAnalysis object that the one we are associating
// here with GBAnalysis. This is OK. This should be the case.
NodeAnalysis* nodeAnalysis = newNodeAnalysis(gb);
// This NodeAnalysis has no association with a Group. It is associated
// with groupBy expression. This NodeAnalysis is actually a hidden one.
// It is only there to give an extra unique id for the presence of this
// group by in a jbbsubset.
GBAnalysis* gbAnalysis = new (heap_) GBAnalysis(nodeAnalysis->getId(), gb, heap_);
nodeAnalysis->setGBAnalysis(gbAnalysis);
gb->setGBAnalysis(gbAnalysis);
// add this nodeId to allGBs_
allGBs_.insert(nodeAnalysis->getId());
return gbAnalysis;
}
// Lookup a JBBSubsetAnalysis by CANodeIdSet. If none is available in cache,
// then create a new one and insert it in cache
JBBSubsetAnalysis * QueryAnalysis::findJBBSubsetAnalysis(const JBBSubset & subset)
{
JBBSubsetAnalysis* result = jbbSubsetMap_.get(subset.getJBBCsAndGB());
if (result)
{
return result; // A hit
}
// A miss. Create a new JBBSubsetAnalysis and insert it
result = new (heap_) JBBSubsetAnalysis(subset, heap_);
CANodeIdSet * key = new (heap_) CANodeIdSet(subset.getJBBCsAndGB(), heap_);
CANodeIdSet * insertion = jbbSubsetMap_.insert(key, result);
// Should work
CMPASSERT(insertion != NULL);
return result;
}
// Find the PredAnalysis the predicate with ValueId x. If none is created
// so far, then create a new one and insert it in array
PredAnalysis* QueryAnalysis::findPredAnalysis(ValueId x)
{
// if predAnalysis not yet created, then do so
if (!allPreds_.contains(x))
{
PredAnalysis* predAn = new (CmpCommon::statementHeap()) PredAnalysis(x);
predAnalysisArray_.insertAt(x, predAn);
allPreds_ += x;
}
return predAnalysisArray_[x];
}
// -------------------------------------------------------------
// Method to initialize flags for global directives
// -------------------------------------------------------------
void QueryAnalysis::initializeGlobalDirectives(RelExpr * root)
{
// Figure out if First N optimization is required
RelExpr * firstNExpr = NULL;
// The firstN info could be stamped on the root or inserted as a firstN node
// as child of the root
if (root->child(0)->getOperator() == REL_FIRST_N)
firstNExpr = root->child(0);
else if (root->getFirstNRows() >= 0)
firstNExpr = root;
if (firstNExpr)
{
// get the number of rows requested i.e. the FirstN rows
// Note: its different methods for RelExpr and FirstN classes
Int64 numRowsRequested;
if (firstNExpr->getOperator() == REL_FIRST_N)
numRowsRequested = ((FirstN*) firstNExpr)->getFirstNRows();
else
numRowsRequested = firstNExpr->getFirstNRows();
RelExpr * childOfFirstN = firstNExpr->child(0);
GroupAttributes * childOfFirstNGroupAttr = childOfFirstN->getGroupAttr();
// get the number of rows estimated to be returned
CostScalar numRowsProduced =
childOfFirstNGroupAttr->getResultCardinalityForEmptyInput();
if((numRowsRequested < 100) ||
((double)numRowsRequested < (0.01 * numRowsProduced.getValue())))
{
optimizeForFirstNRows_ = TRUE;
}
if (CmpCommon::getDefault(COMP_BOOL_78) != DF_ON)
{
optimizeForFirstNRows_ = FALSE;
}
}
// WaveFix Begin
if ((root->child(0)) &&
(root->child(0)->getOperatorType() == REL_GROUPBY) &&
(!((GroupByAgg *)(root->child(0).getPtr()))->groupExpr().entries()) &&
(root->child(0)->child(0)->getOperatorType() == REL_SCAN) &&
(CmpCommon::getDefault(COMP_BOOL_169) == DF_OFF))
{
// check if table is a single partitioned table
NABoolean isASinglePartitionedTable = FALSE;
Scan * tableScan = (Scan *)root->child(0)->child(0).getPtr();
TableAnalysis * tableAnalysis =
(TableAnalysis *)tableScan->getTableDesc()->getTableAnalysis();
const IndexDesc * tableIndexDesc =
tableAnalysis
->getTableDesc()
->getClusteringIndex();
const PartitioningFunction * tablePartFunc =
tableIndexDesc->getPartitioningFunction();
if ((!tablePartFunc)||
(tablePartFunc->getCountOfPartitions()==1) ||
(tablePartFunc->getPartitioningKey().
doAllExprsEvaluateToConstant(FALSE,TRUE)))
{
isASinglePartitionedTable = TRUE;
}
// check if there are any count distincts
NABoolean noCountDistincts = FALSE;
GroupByAgg * groupByNode = (GroupByAgg *)(root->child(0).getPtr());
ValueIdSet distinctColumns; // columns (exprs) referenced
// in non-redundant DISTINCT
// aggregates
CollIndex numNonMultiDistinctAggs = 0;
const ValueIdSet &aggrs = groupByNode->aggregateExpr();
for (ValueId x = aggrs.init(); aggrs.next(x); aggrs.advance(x))
{
Aggregate *agg = (Aggregate *) x.getItemExpr();
CMPASSERT(x.getItemExpr()->isAnAggregate());
if (agg->isDistinct())
{
ValueIdSet uniqueSet = groupByNode->groupExpr();
uniqueSet += agg->getDistinctValueId();
if (NOT groupByNode->child(0).getGroupAttr()->isUnique(uniqueSet)) {
distinctColumns += agg->getDistinctValueId();
if((agg->getDistinctValueId() != agg->child(0)) ||
(agg->getArity() > 1)) {
numNonMultiDistinctAggs++;
}
}
}
}
if (!distinctColumns.entries())
noCountDistincts = TRUE;
// don't surf the wave if:
// * the table is not a single partitioned table
// And
// * there are no count distincts
// * it has no local predicates
if ((!isASinglePartitionedTable) &&
noCountDistincts &&
tableAnalysis->getLocalPreds().entries() == 0)
setDontSurfTheWave(TRUE);
}
// WaveFix End
}
NABoolean QueryAnalysis::fixupTriggers(RelExpr* expr)
{
NABoolean treeFixed = FALSE;
expr->calcAccessSets(CmpCommon::statementHeap());
expr->fixupTriggers(treeFixed);
expr->clearAllEstLogProp();
if(treeFixed)
expr->synthLogProp();
return treeFixed;
}
// -------------------------------------------------------------
// The Main Analysis method
// Returns a pointer to the analyzed RelExpr tree, which may be
// a different tree than the one passed in as input. If noMVQR is
// TRUE (it is FALSE by default), MV query rewrite is suppressed
// regardless of the value of the MVQR_REWRITE_LEVEL CQD.
// -------------------------------------------------------------
RelExpr* QueryAnalysis::analyzeThis(RelExpr* expr, NABoolean noMVQR)
{
NABoolean status = TRUE;
NABoolean monitor = CURRSTMT_OPTDEFAULTS->compileTimeMonitor();
NABoolean treeFixed = fixupTriggers(expr);
// analyzedExpr is the pointer returned to the caller.
RelExpr* analyzedExpr = expr;
// First do pilot phase analysis
if (monitor) pilotPhaseMon_.enter();
status = expr->pilotAnalysis(this);
if (monitor) pilotPhaseMon_.exit();
// stop if incompatible node found
if (!status || !analysisON_)
{
// clear analysis
cleanup(expr);
// setInitialMemory for optimizer here
// doing this here because in cases
// when we don't come here this is
// called below, before the call to
// RelExpr::analysizeInitialPlan
// This ensures that we setInitialMemory
// when analysis is ON of OFF.
// This is the OFF case.
CURRSTMT_OPTDEFAULTS->setInitialMemory();
return analyzedExpr;
}
if (monitor) jbbSetupMon_.enter();
expr->jbbAnalysis(this);
primeGroupAnalysisForSubtree(expr);
// analyze dependencies between JBBCs
analyzeJBBCDependencies(expr);
if (monitor) jbbSetupMon_.exit();
// TCG work here
if (monitor) tcgSetupMon_.enter();
expr->predAnalysis(this);
finishConnectivityAnalysis();
checkIfCompressedHistsViable();
if (monitor) tcgSetupMon_.exit();
// finishSynthEstLogProp has nothing to do with Query Analysis, but it
// needs to be done before multi-join transformation
expr->finishSynthEstLogProp();
ULng32 sizeOfLargestJBB = getSizeOfLargestJBB();
#ifndef NDEBUG
RelExpr * analyzerInput = expr;
#endif
// Do the multi join re-write now
if ((sizeOfLargestJBB >= multiJoinThreshold_) AND
!joinOrderByUser())
{
#ifndef NDEBUG
// get a copy of the RelExpr tree before it is converted to
// multiJoin. This is needed below for the MultiJoinTester.
// Want to invoke the MultiJoinTester after setting JBBInput.
if (sizeOfLargestJBB && (CmpCommon::getDefault(COMP_BOOL_8) == DF_ON))
{
analyzerInput = expr->copyRelExprTree(CmpCommon::statementHeap());
}
#endif
expr->convertToMultiJoinSubtree(this);
double n, n2, n3, n4;
n= n2= n3= n4= 0;
double queryMJComplexity =
expr->calculateQueryMJComplexity(n,n2,n3,n4);
// complexity should be atleast 1
if (queryMJComplexity < 1)
queryMJComplexity=n=n2=n3=n4=1;
queryComplexityVector_ = new(CmpCommon::statementHeap())
QueryComplexityVector(CmpCommon::statementHeap());
queryComplexityVector_->setNComplexity(n);
queryComplexityVector_->setN2Complexity(n2);
queryComplexityVector_->setN3Complexity(n3);
queryComplexityVector_->setN4Complexity(n4);
queryComplexityVector_->setExhaustiveComplexity(queryMJComplexity);
CURRSTMT_OPTDEFAULTS->setQueryMJComplexity(queryMJComplexity);
expr->clearLogExprForSynthDuringAnalysis();
expr->synthLogProp();
multiJoinsUsed_ = TRUE;
}
expr->setJBBInput((*GLOBAL_EMPTY_INPUT_LOGPROP));
if ((expr->getOperatorType() == REL_ROOT) AND
(CmpCommon::context()->showQueryStats()))
setHistogramsToDisplay((RelRoot*) expr);
#ifndef NDEBUG
// TESTING
if (sizeOfLargestJBB && (CmpCommon::getDefault(COMP_BOOL_8) == DF_ON))
{
MultiJoinTester::Test1(analyzerInput, expr);
expr->clearLogExprForSynthDuringAnalysis();
expr->synthLogProp();
expr->setJBBInput((*GLOBAL_EMPTY_INPUT_LOGPROP));
}
#endif
if (expr->getOperatorType() == REL_ROOT)
{
((RelRoot *)expr)->setHasMandatoryXP(hasMandatoryXP());
}
// ASM and QG work
// This should also be disabled if cqsRewrite succeed
if (appStatMan_ && CmpCommon::getDefault(ASM_PRECOMPUTE) == DF_ON)
{
if (monitor) asmPrecompMon_.enter();
initializeASM();
if (monitor) asmPrecompMon_.exit();
}
// Compute basic stats for the nodes
computeStatsForNodes();
// Compute the set of tables joined to the largest table
if (CmpCommon::getDefault(COMP_BOOL_119) == DF_ON)
computeTablesJoinedToLargestTable();
// Now we do the CQS Fixup if applicable
if (expr->getOperatorType() == REL_ROOT)
{
cqsRewrite(expr);
}
// Initialize the global directive flags
// Note:
// this should be after synthLogProp since
// some flags will be set based on the statistics
// computations that are done in synthLogProp
initializeGlobalDirectives(expr);
// setInitialMemory for optimizer here
// doing this because some LSR work is
// done in analyzeInitialPlan
CURRSTMT_OPTDEFAULTS->setInitialMemory();
// this should be the last active call
// i.e. last call that does anything to
// the expression
if (CmpCommon::getDefault(COMP_BOOL_115) == DF_OFF)
expr->analyzeInitialPlan();
// display (or dump to file) JBB connectivity graph if requested
graphDisplay();
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << getText() << endl;
}
#define MVQR
#ifdef MVQR
if (!noMVQR && CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) > 0)
analyzedExpr = handleMvQueryRewrite(analyzedExpr);
// if dontSurfTheWave() is set previously and there are any MVQR matches
// then unset dont suf the wave as we do not want the privority of the
// SortGrby plan to be increased that avoids checking the costing
// between SortGrby plan and the matching MV plan.
if ((dontSurfTheWave()) &&
(expr->child(0)) &&
(expr->child(0)->getOperatorType() == REL_GROUPBY) &&
(((GroupByAgg *)(expr->child(0).getPtr()))->getJBBSubsetAnalysis() &&
((GroupByAgg *)(expr->child(0).getPtr()))->getJBBSubsetAnalysis()
->getMatchingMVs().entries()))
setDontSurfTheWave(FALSE);
#endif
return analyzedExpr;
}
void QueryAnalysis::setHistogramsToDisplay(RelRoot * root)
{
CANodeIdSet nodeSetForHistEst;
const char * nodeSetStr = ActiveSchemaDB()->getDefaults().getValue(HIST_ROOT_NODE);
size_t nodeSetStrLen = strlen(nodeSetStr);
NABoolean validNodeSet = TRUE;
if (nodeSetStr && nodeSetStrLen)
{
char * nodeSetStrForTokenization = NULL;
validNodeSet = FALSE;
if (nodeSetStr)
{
if (nodeSetStrLen)
{
nodeSetStrForTokenization = new (CmpCommon::statementHeap()) char[nodeSetStrLen+1];
strcpy(nodeSetStrForTokenization, nodeSetStr);
char * token;
token = strtok (nodeSetStrForTokenization," ,");
while (token != NULL)
{
nodeSetForHistEst.insert(atoi(token));
token = strtok (NULL, " ,");
}
}
}
if(nodeSetForHistEst.entries())
{
if(nodeSetForHistEst.entries() == 1)
{
// can be:
// 0. any jbbc
// 1. scan
// 2. group by
// 3. full outer join
// 4. tsj
CANodeId node = nodeSetForHistEst.getFirst();
if(node.getNodeAnalysis())
{
RelExpr * nodeExpr = node.getNodeAnalysis()->getModifiedExpr();
if(node.getNodeAnalysis()->getJBBC() ||
node.getNodeAnalysis()->getTableAnalysis() ||
node.getNodeAnalysis()->getGBAnalysis() ||
(nodeExpr->isAnyJoin() &&
(((Join*) nodeExpr)->isFullOuterJoin() ||
((Join*) nodeExpr)->isTSJ())))
validNodeSet = TRUE;
}
}
else{
validNodeSet = TRUE;
// has to be a join
// check if all nodes are jbbcs in the same jbb
CollIndex jbbId = -1;
for(CANodeId node = nodeSetForHistEst.init();
nodeSetForHistEst.next(node);
nodeSetForHistEst.advance(node))
{
// each node should be jbbc
if (!node.getNodeAnalysis() || !node.getNodeAnalysis()->getJBBC())
{
validNodeSet = FALSE;
break;
}
// all jbbcs should belong to the same JBB
if((jbbId != -1) && (node.getNodeAnalysis()->getJBBC()->getJBB()->getJBBId()!=jbbId))
{
validNodeSet = FALSE;
break;
}
jbbId = node.getNodeAnalysis()->getJBBC()->getJBB()->getJBBId();
}
if(validNodeSet)
validNodeSet = nodeSetForHistEst.legal();
}
}
}
if (validNodeSet)
{
EstLogPropSharedPtr inputLP = (*GLOBAL_EMPTY_INPUT_LOGPROP);
if (nodeSetForHistEst.entries() == 0)
{
statsToDisplay_ = root->child(0)->getGroupAttr()->outputLogProp(inputLP);
}
else if (nodeSetForHistEst.entries() == 1)
{
inputLP = nodeSetForHistEst.getFirst().getNodeAnalysis()->getJBBInput();
if(inputLP.get() == NULL) inputLP = (*GLOBAL_EMPTY_INPUT_LOGPROP);
statsToDisplay_ =
nodeSetForHistEst.getFirst().getNodeAnalysis()->getModifiedExpr()->getGroupAttr()->outputLogProp(inputLP);
}
else{
JBBSubset * jbbSubset = nodeSetForHistEst.jbbcsToJBBSubset();
inputLP = jbbSubset->getJBB()->getInputEstLP();
//create and prime multijoin
//use multijoin to get EstLogProp
MultiJoin* mj = new (CmpCommon::statementHeap())
MultiJoin((*jbbSubset), CmpCommon::statementHeap());
// Set the children based on the Query Analysis jbbcExprMap_
mj->setChildrenFromOrigExprs(this);
mj->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
mj->synthLogProp();
statsToDisplay_ = mj->getGroupAttr()->outputLogProp(inputLP);
}
selectListCols_ = root->compExpr();
}
}
// method to show a query's histograms
void QueryAnalysis::showQueryStats(const char *qText, CollHeap *c, char *buf)
{
if (statsToDisplay_)
{
sprintf(buf, "Histograms for query %s", qText);
statsToDisplay_->getColStats().showQueryStats(c, buf, selectListCols_);
}
}
//----------------------------------------------------------------------------
RelExpr* QueryAnalysis::handleMvQueryRewrite(RelExpr* expr)
{
NAString warningMessage = "";
// We don't handle olap or sequence functions yet,
if (expr->containsNode(REL_SEQUENCE))
{
warningMessage = "SEQUENCE is not supported.";
}
else if (isMvCreation())
{
// We are in a CREATE MV operation.
// Need to create an MV descriptor,
mvQueryRewriteHandler_->createMvDescriptor(this, expr, warningMessage);
}
else
{
// Don't attempt rewrite of DDL statements, and don't warn about it.
if (expr->getRETDesc()->getBindWA()->inDDL())
return expr;
// Do the work here.
expr = mvQueryRewriteHandler_->handleMvQueryRewrite(this, expr, warningMessage);
}
// Anything to warn about?
if (warningMessage != "")
{
QRLogger::log(CAT_SQL_COMP_QR_HANDLER, LL_WARN, warningMessage);
mvQueryRewriteHandler_->getWarningMessage() = warningMessage;
}
return expr;
} // handleMvQueryRewrite()
//----------------------------------------------------------------------------
void QueryAnalysis::analyzeJBBCDependencies(RelExpr* expr)
{
// set the dependencies between the children of JBB
ValueIdSet predsWithDependencies;
// This will set predicates with dependencies
expr->jbbJoinDependencyAnalysis(predsWithDependencies);
// The loop below will set the dependent
// JBBCs based on the predicates set above
const ARRAY(JBB*) jBBs = getJBBs();
for(UInt32 i = 0; i < jBBs.entries(); i++)
jBBs[i]->analyzeChildrenDependencies();
for(UInt32 i = 0; i < jBBs.entries(); i++)
jBBs[i]->computeLeftJoinFilterPreds();
//compute children subgraph dependencies
for(UInt32 i = 0; i < jBBs.entries(); i++)
jBBs[i]->computeChildrenSubGraphDependencies();
}
NABoolean QueryAnalysis::hasMandatoryXP()
{
if(hasMandatoryXPComputed_)
return hasMandatoryXP_;
hasMandatoryXP_ = FALSE;
const ARRAY(JBB*) jBBs = getJBBs();
for(UInt32 i = 0; i < jBBs.entries(); i++)
if(jBBs[i]->hasMandatoryXP())
hasMandatoryXP_ = TRUE;
hasMandatoryXPComputed_ = TRUE;
return hasMandatoryXP_;
}
void QueryAnalysis::computeTablesJoinedToLargestTable()
{
if(!largestTable_)
return;
ValueIdList largestTableCK =
largestTable_
->getTableDesc()
->getClusteringIndex()
->getIndexKey();
if (!largestTableCK.entries())
return;
// get a handle to the key prefix column
ValueId keyColumn = largestTableCK[0];
//get a handle to the column analysis for this column
ColAnalysis * keyColAnalysis = keyColumn.colAnalysis();
if (!keyColAnalysis)
return;
CANodeIdSet tablesConnectedViaKeyCol =
keyColAnalysis->getConnectedTables();
if (!tablesConnectedViaKeyCol.entries())
return;
CANodeId mostReducedTable;
CostScalar highestRedFactor = -1;
for(CANodeId currentTable = tablesConnectedViaKeyCol.init();
tablesConnectedViaKeyCol.next(currentTable);
tablesConnectedViaKeyCol.advance(currentTable))
{
if(!currentTable.getNodeAnalysis()->getTableAnalysis())
continue;
if(mostReducedTable==NULL_CA_ID)
{
mostReducedTable = currentTable;
highestRedFactor =
(mostReducedTable.
getNodeAnalysis()->
getTableAnalysis()->
getCardinalityOfBaseTable())/
(mostReducedTable.
getNodeAnalysis()->
getCardinality());
continue;
}
CostScalar currentTableRedFactor =
(currentTable.
getNodeAnalysis()->
getTableAnalysis()->
getCardinalityOfBaseTable())/
(currentTable.
getNodeAnalysis()->
getCardinality());
if (currentTableRedFactor > highestRedFactor)
{
highestRedFactor = currentTableRedFactor;
mostReducedTable = currentTable;
}
}
if(mostReducedTable != NULL_CA_ID)
joinedToLargestTable_.insert(mostReducedTable);
}
void QueryAnalysis::cleanup(RelExpr* expr)
{
Lng32 sizeOfLargestJBB = getSizeOfLargestJBB();
// set Analysis flag OFF
analysisON_ = FALSE;
// set MultiJoin Rewrite flag OFF
multiJoinsUsed_ = FALSE;
// disable ASM by setting appStatMan to NULL
appStatMan_ = NULL;
// clear analysis data stored in GroupAnalysis of tree's exressions
clearAnalysis(expr);
// do finishAnalysis before exiting Query Analysis phase.
expr->finishSynthEstLogProp();
return;
}
void QueryAnalysis::cqsRewrite(RelExpr* expr)
{
RelRoot *root = (RelRoot *) expr;
CURRENTSTMT->clearCqsWA();
if (multiJoinsUsed_ AND // cqsRewrite expect multiJoins
root->getReqdShape() AND
NOT root->getReqdShape()->isCutOp())
{
RelExpr * cqsExpr = root->getReqdShape();
root->forceCQS(cqsExpr);
root->synthLogProp();
}
return;
}
void QueryAnalysis::clearAnalysis(RelExpr* expr)
{
// do self
expr->getGroupAttr()->clearGroupAnalysis();
// do children
Int32 arity = expr->getArity();
for (Lng32 i = 0; i < arity; i++)
{
clearAnalysis(expr->child(i).getPtr());
}
return;
}
void QueryAnalysis::primeGroupAnalysisForSubtree(RelExpr* expr)
{
// do children first
Int32 arity = expr->getArity();
for (Lng32 i = 0; i < arity; i++)
{
primeGroupAnalysisForSubtree(expr->child(i).getPtr());
}
// do self now
expr->primeGroupAnalysis();
return;
}
void QueryAnalysis::finishConnectivityAnalysis()
{
CANodeIdSet tables = tCGraph_->tables_;
CANodeId table;
for (table = tables.init();
tables.next(table);
tables.advance(table))
{
table.getNodeAnalysis()->getTableAnalysis()->finishAnalysis();
}
return;
}
void QueryAnalysis::checkIfCompressedHistsViable()
{
CANodeIdSet tables = tCGraph_->tables_;
CANodeId table;
for (table = tables.init();
tables.next(table);
tables.advance(table))
{
table.getNodeAnalysis()->getTableAnalysis()->checkIfCompressedHistsViable();
}
return;
}
// Find JBB with most JBBCs
JBB* QueryAnalysis::getLargestJBB()
{
JBB* largestJBB = NULL;
CollIndex sizeOfLargestJBB = 0;
CollIndex i = 0;
CollIndex remainingJBBs = jbbArray_.entries();
for (i = 0; remainingJBBs > 0; i++)
{
if (jbbArray_.used(i))
{
if (sizeOfLargestJBB < jbbArray_[i]->getJBBCs().entries())
{
largestJBB = jbbArray_[i];
sizeOfLargestJBB = largestJBB->getJBBCs().entries();
}
remainingJBBs--;
}
}
return largestJBB;
}
ULng32 QueryAnalysis::getSizeOfLargestJBB()
{
JBB* largestJBB = getLargestJBB();
if (largestJBB)
return largestJBB->getJBBCs().entries();
else
return 0;
}
// This method is called only after ASM initialization
// It computes some basic stats for nodes and tables and caches them.
void QueryAnalysis::computeStatsForNodes()
{
CollIndex i = 0;
CollIndex remainingNodes = nodeAnalysisArray_.entries();
for (i = 0; remainingNodes > 0; i++)
{
remainingNodes--;
CMPASSERT(nodeAnalysisArray_.used(i));
if (nodeAnalysisArray_.used(i) && i != NULL_CA_ID) // NULL_CA_ID is zero
{
nodeAnalysisArray_[i]->computeStats();
}
}
}
// For the graph display by the dotty tool
void QueryAnalysis::graphDisplay() const
{
// See if there is something to display first
if(jbbArray_.entries() == 0)
return;
NAString fileName = "";
const char* fname = ActiveSchemaDB()->getDefaults().
getValue(DISPLAY_DATA_FLOW_GRAPH);
if(!(*fname) || strcasecmp(fname,"OFF") == 0)
{
// Display graph is OFF. bye bye
return;
}
if(*fname && strcasecmp(fname,"ON") == 0)
{
// The default file name is used here
fileName = "C:\\ATT\\Graphviz\\graphs\\f.dot";
}
else
{
// Use name specified in CQD
fileName = fname;
}
NAString result = "graph G {\n";
ofstream fileout(fileName, ios::out);
result += "fontsize=20 label=\"\\nQuery Connectivity Analysis\"\n";
result += "edge [len=2 color=blue fontcolor=blue];\n";
CollIndex i = 0;
CollIndex remainingJBBs = jbbArray_.entries();
for (i = 0; remainingJBBs > 0; i++)
{
if (jbbArray_.used(i))
{
result += jbbArray_[i]->graphDisplay(this) + "\n";
remainingJBBs--;
}
}
result += "}\n";
fileout<<result;
return;
}
const NAString QueryAnalysis::getText() const
{
NAString result(heap_);
result += "-----------------------------------------------------\n";
result += "QueryAnalysis:\n\n";
CollIndex i = 0;
result += "Have "+ istring(nodeAnalysisArray_.entries()-1) + " CA Nodes:\n\n";
CollIndex remainingNodes = nodeAnalysisArray_.entries();
for (i = 0; remainingNodes > 0; i++)
{
if (nodeAnalysisArray_.used(i))
{
if (i != NULL_CA_ID) // which is zero
result += nodeAnalysisArray_[i]->getText() + "\n";
remainingNodes--;
}
}
result += "Have "+ istring(allJBBCs_.entries()) + " JBBCs:\n";
result += allJBBCs_.getText()+"\n";
result += "Have "+ istring(jbbArray_.entries()) + " JBBs:\n";
CollIndex remainingJBBs = jbbArray_.entries();
for (i = 0; remainingJBBs > 0; i++)
{
if (jbbArray_.used(i))
{
result += jbbArray_[i]->getText() + "\n";
remainingJBBs--;
}
}
result += "\n";
result += "-----------------------------------------------------\n";
return result;
}
void QueryAnalysis::display() const
{
print();
}
void QueryAnalysis::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// pilotAnalysis() and jbbAnalysis() and other RelExpr methods
// -----------------------------------------------------------------------
NABoolean RelExpr::pilotAnalysis(QueryAnalysis* qa)
{
NABoolean status = TRUE;
// do pilot analysis on this expr
// xxx make this status=FALSE rather than return FALSE
// Compound Statements are not supported now. xxx
// This can be added, but no big gain expected since CS
// statements are small usually.
if (getOperator().match(REL_COMPOUND_STMT))
{
return FALSE;
}
// Streams causes problems now with the MultiJoin becuase
// they requires TSJ and the MultiJoin rules do not respect
// this requirement for now. This can be fixed.
if (getGroupAttr()->isStream())
{
return FALSE;
}
// now pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
status = status AND child(i)->pilotAnalysis(qa);
return status;
}
NABoolean GroupByAgg::pilotAnalysis(QueryAnalysis* qa)
{
// First we do the normal RelExpr pilotAnalysis work
NABoolean status = RelExpr::pilotAnalysis(qa);
// stop here if it fails
if (!status)
return FALSE;
// Now we do special GB work
RelExpr* child = this->child(0);
NABoolean joinThatCanBePartOfJBB =
(child->getOperator().match(REL_ANY_JOIN) && ((Join*)child)->canBePartOfJBB());
NABoolean singleScanThatCanBePartOfJBB =
((child->getOperator().match(REL_SCAN)) && (CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) > 0) &&
!(child->getGroupAttr()->isUnique(groupExpr()))); // group by can be eliminated
if (!joinThatCanBePartOfJBB && !singleScanThatCanBePartOfJBB)
{
// This is not on top of JBB. No more work needed.
return status;
}
// Now that this GB is on top of a JBB, we need to create a GBAnalysis
// and assigh a special CANodeId for it.
if (singleScanThatCanBePartOfJBB)
{
qa->newJBBC(NULL, child);
}
qa->newGBAnalysis(this);
return status;
}
NABoolean Scan::pilotAnalysis(QueryAnalysis* qa)
{
// Describe operator is a problem because it is derived from
// Scan but it has junk TableDesc. This check will be put later
// in Describe::pilotAnalysis(), but this is a fast kludge xxx.
if (getOperator().match(REL_DESCRIBE))
{
return FALSE;
}
if (!qa->newTableAnalysis(this))
return FALSE;
return TRUE;
}
NABoolean TableValuedFunction::pilotAnalysis(QueryAnalysis* qa)
{
if (!qa->newTableAnalysis(this))
return FALSE;
return TRUE;
}
NABoolean RelRoutine::pilotAnalysis(QueryAnalysis* qa)
{
NABoolean status = RelExpr::pilotAnalysis(qa);
if(!status)
return FALSE;
if (!qa->newRoutineAnalysis(this))
return FALSE;
return TRUE;
}
NABoolean Join::pilotAnalysis(QueryAnalysis* qa)
{
NABoolean status = TRUE;
// set status to FALSE if this
// join cannot be part of JBB
if (isASpoilerJoin())
status = FALSE;
if ( qa->isCompressedHistsViable() &&
(isTSJ() || isSemiJoin() || isOuterJoin() ) )
qa->disableCompressedHistsViable();
if(canBePartOfJBB())
{
// the right child is always a JBBC
qa->newJBBC(this, child(1).getPtr());
// the left child is a JBBC only if it is:
// * not one of the following joins
// * inner
// * left join
// * semi
// * anti-semi
// * full outer join becomes a JBBC
// * TSJ becomes a JBBC but by default it is a spoiler
if ((!child(0)->getOperator().match(REL_ANY_JOIN))||
(!((Join *)child(0).getPtr())->canBePartOfJBB()))
{
NABoolean isFullOuterJoinOrTSJJBBC = FALSE;
if (child(0)->getOperator().match(REL_ANY_JOIN))
isFullOuterJoinOrTSJJBBC = TRUE;
qa->newJBBC(NULL, child(0).getPtr(),isFullOuterJoinOrTSJJBBC);
}
}
else
qa->setSkippedSomeJoins();
// now pass call to children
for (Lng32 i = 0; i < 2; i++)
status = status AND child(i)->pilotAnalysis(qa);
return status;
}
NABoolean Insert::pilotAnalysis(QueryAnalysis* qa)
{
NABoolean status = TRUE;
// Ensure the column analysis is properly set up
// for an embedded insert that contains predicates.
if (getGroupAttr()->isEmbeddedInsert() || (getGroupAttr()->getCharacteristicOutputs().entries() > 0))
{
if (!qa->newTableAnalysis(this))
return FALSE;
}
// now pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
status = status AND child(i)->pilotAnalysis(qa);
return status;
}
// this method will be used for update and delete (both are derived from
// generic update. Insert (also derived from generic update) has its own
// pilotAnalysis
NABoolean GenericUpdate::pilotAnalysis(QueryAnalysis* qa)
{
NABoolean status = TRUE;
// Update operator is a problem for TCG because it generates
// multiple ValueIds for the same BaseColumn. We will delay
// handling this spoiler for now. This check will be later put
// in Update::pilotAnalysis(), but this is a fast kludge xxx.
// Future fix for this whole Update issue is either to change
// type of these duplicate base columns to something else. Or
// to rely on base column list in analyzer to test.
// Same thing for Delete operator.
if (((CmpCommon::getDefault(COMP_BOOL_217) == DF_ON) &&
((getOperator().match(REL_UNARY_UPDATE)) OR
(getOperator().match(REL_UNARY_DELETE)))) OR
(getGroupAttr()->isEmbeddedUpdateOrDelete()))
{
return FALSE;
}
// Ensure the column analysis is properly set up
if (((getOperator().match(REL_UNARY_UPDATE)) OR
(getOperator().match(REL_UNARY_DELETE)) OR
(getGroupAttr()->getCharacteristicOutputs().entries() > 0))
&&
(!qa->newTableAnalysis(this)))
return FALSE;
// now pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
status = status AND child(i)->pilotAnalysis(qa);
return status;
}
void RelExpr::jbbAnalysis(QueryAnalysis* qa)
{
// recursively call the children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
{
child(i)->jbbAnalysis(qa);
}
return;
}
void GroupByAgg::jbbAnalysis(QueryAnalysis* qa)
{
RelExpr* child = this->child(0);
NABoolean joinThatCanBePartOfJBB =
(child->getOperator().match(REL_ANY_JOIN) && ((Join*)child)->canBePartOfJBB());
NABoolean singleScanThatCanBePartOfJBB =
((child->getOperator().match(REL_SCAN)) && (CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) > 0) &&
!(child->getGroupAttr()->isUnique(groupExpr()))); // group by can be eliminated
if (!joinThatCanBePartOfJBB && !singleScanThatCanBePartOfJBB)
{
// not on top of JBB. do it like other RelExprs.
RelExpr::jbbAnalysis(qa);
return;
}
// This GB is on top of a JBB. create a JBB object.
JBB* jbb = qa->newJBB(this);
// analyse the JBB and children
jbb->analyze(this);
// recursively pass call to children exprs of the entire JBB
RelExpr* expr = child;
while (expr->getOperator().match(REL_ANY_JOIN) &&
((Join*)expr)->canBePartOfJBB())
{
expr->child(1)->jbbAnalysis(qa);
expr = expr->child(0);
}
// left most JBBC
expr->jbbAnalysis(qa);
// Can do other GB Analysis stuff here like R1 and R2 computations.
return;
}
void Join::jbbAnalysis(QueryAnalysis* qa)
{
if(!canBePartOfJBB())
{
RelExpr::jbbAnalysis(qa);
return;
}
// this is the top join in a JBB. create a JBB object for it
JBB* jbb = qa->newJBB(this);
// analyse the JBB and children
jbb->analyze(this);
// recursively pass call to children exprs of the entire JBB
RelExpr* expr = this;
while (expr->getOperator().match(REL_ANY_JOIN)&&
((Join*)expr)->canBePartOfJBB())
{
expr->child(1)->jbbAnalysis(qa);
expr = expr->child(0);
}
// left most JBBC
expr->jbbAnalysis(qa);
return;
}
void RelExpr::jbbJoinDependencyAnalysis(ValueIdSet & predsWithDependencies)
{
// call dependency analysis on children
Int32 arity = getArity();
for(Lng32 i = 0; i < arity; i++)
child(i)->jbbJoinDependencyAnalysis(predsWithDependencies);
}
void Join::jbbJoinDependencyAnalysis(ValueIdSet & predsWithDependencies)
{
if(canBePartOfJBB())
{
predsWithDependencies += getJoinPred();
}
RelExpr::jbbJoinDependencyAnalysis(predsWithDependencies);
if(canBePartOfJBB())
{
ValueIdSet emptySet;
JBBC * rightChildJBBC =
child(1)->
getGroupAnalysis()->
getNodeAnalysis()->
getJBBC();
if(rightChildJBBC)
{
if(!isInnerNonSemiNonTSJJoin())
rightChildJBBC->setPredsWithDependencies(predsWithDependencies,
getJoinPred());
else
rightChildJBBC->setPredsWithDependencies(predsWithDependencies,
emptySet);
}
NodeAnalysis * leftChildNodeAnalysis =
child(0)->
getGroupAnalysis()->
getNodeAnalysis();
JBBC * leftChildJBBC = NULL;
if (leftChildNodeAnalysis)
leftChildJBBC =
leftChildNodeAnalysis->getJBBC();
if(leftChildJBBC){
leftChildJBBC->setPredsWithDependencies(predsWithDependencies, emptySet);
}
}
}
void RelExpr::predAnalysis(QueryAnalysis* qa)
{
// do pred analysis on this expr
ValueIdSet exprSet = getSelectionPred();
exprSet.columnAnalysis(qa);
// now pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i)->predAnalysis(qa);
return;
}
void Join::predAnalysis(QueryAnalysis* qa)
{
// do pred analysis on this expr
ValueIdSet exprSet = getSelectionPred();
exprSet += joinPred();
NABoolean predsOnSemiOrLeftJoin =
!isInnerNonSemiJoin();
exprSet.columnAnalysis(qa, predsOnSemiOrLeftJoin);
// now pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i)->predAnalysis(qa);
return;
}
// returns TRUE if join is a spoiler
NABoolean Join::isASpoilerJoin()
{
if(isRoutineJoin() && (CmpCommon::getDefault(ROUTINE_JOINS_SPOIL_JBB) == DF_ON))
return TRUE;
// TSJ and right outer join are not allowed in the MultiJoin framework
// Note isRightJoin returns TRUE for full outer joins also, hence
// the second OR condition
if(((isTSJ() && !isRoutineJoin()) && (CmpCommon::getDefault(TSJS_SPOIL_JBB) == DF_ON))||
(!isFullOuterJoin() && isRightJoin()))
return TRUE;
// control if left joins are allowed allowed in MultiJoin framework
if(isFullOuterJoin() &&
(CmpCommon::getDefault(FULL_OUTER_JOINS_SPOIL_JBB) == DF_ON))
return TRUE;
// control if left joins are allowed allowed in MultiJoin framework
if(isLeftJoin() &&
(CmpCommon::getDefault(LEFT_JOINS_SPOIL_JBB) == DF_ON))
return TRUE;
// control if semi and anti-semijoins are allowed in MultiJoin framework
if((isSemiJoin() || isAntiSemiJoin()) &&
(CmpCommon::getDefault(SEMI_JOINS_SPOIL_JBB) == DF_ON))
return TRUE;
return FALSE;
}
// returns TRUE if join can be part of the JBB
NABoolean Join::canBePartOfJBB()
{
// Non-RoutineJoin-TSJ and right-outer-join and full-outer-join cannot be part of JBB
if((isTSJ() && !isRoutineJoin()) || isRightJoin() || isFullOuterJoin())
return FALSE;
return TRUE;
}
RelExpr* RelExpr::convertToMultiJoinSubtree(QueryAnalysis* qa)
{
// pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i) = child(i)->convertToMultiJoinSubtree(qa);
if (getGroupAnalysis()->getNodeAnalysis())
getGroupAnalysis()->getNodeAnalysis()->setModifiedExpr(this);
return this;
}
RelExpr* Join::convertToMultiJoinSubtree(QueryAnalysis* qa)
{
if(!canBePartOfJBB())
{
return RelExpr::convertToMultiJoinSubtree(qa);
}
if (!getGroupAnalysis()->getLocalJBBView())
{
// in this case just use default RelExpr implementation
return RelExpr::convertToMultiJoinSubtree(qa);
}
CollHeap* outHeap = qa->outHeap();
MultiJoin* result = new (outHeap)
MultiJoin(*(getGroupAnalysis()->getLocalJBBView()), outHeap);
// Share my groupAttr with result
result->setGroupAttr(getGroupAttr());
// Set the children based on the Query Analysis jbbcExprMap_
result->setChildrenFromOrigExprs(qa);
// This call will pass the request to the MultiJoin children
result->convertToMultiJoinSubtree(qa);
return result;
}
EstLogPropSharedPtr RelExpr::setJBBInput(EstLogPropSharedPtr & inLP)
{
// pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i)->setJBBInput(inLP);
if (getGroupAttr() &&
getGroupAttr()->getGroupAnalysis() &&
getGroupAttr()->getGroupAnalysis()->getNodeAnalysis())
{
getGroupAttr()->getGroupAnalysis()->getNodeAnalysis()->setJBBInput(inLP);
}
return getGroupAttr()->outputLogProp(inLP);
}
EstLogPropSharedPtr Join::setJBBInput(EstLogPropSharedPtr & inLP)
{
EstLogPropSharedPtr inputForRight = inLP;
EstLogPropSharedPtr leftOutput =
child(0)->setJBBInput(inLP);
if(isTSJ())
{
// if this is a TSJ set the flag that indicates
// that it is a mandatory TSJ, i.e. a TSJ that
// could not be unnested
setTSJAfterSQO();
inputForRight = leftOutput;
}
child(1)->setJBBInput(inputForRight);
if (getGroupAttr() &&
getGroupAttr()->getGroupAnalysis())
{
if (getGroupAttr()->getGroupAnalysis()->getNodeAnalysis())
getGroupAttr()->getGroupAnalysis()->getNodeAnalysis()->setJBBInput(inLP);
else if (getGroupAttr()->getGroupAnalysis()->getLocalJBBView() &&
getGroupAttr()->getGroupAnalysis()->getLocalJBBView()->getJBBCs().entries() == 2)
getGroupAttr()->getGroupAnalysis()->getLocalJBBView()->getJBB()->setInputEstLP(inLP);
}
return getGroupAttr()->outputLogProp(inLP);
}
EstLogPropSharedPtr MultiJoin::setJBBInput(EstLogPropSharedPtr & inLP)
{
if((inLP != (*GLOBAL_EMPTY_INPUT_LOGPROP)) && (!inLP->getNodeSet()))
{
CANodeIdSet * inputNodeSet = new (CmpCommon::statementHeap()) CANodeIdSet();
*inputNodeSet += NULL_CA_ID;
inLP->setNodeSet(inputNodeSet);
}
inLP->setCacheableFlag(TRUE);
jbbSubset_.getJBB()->setInputEstLP(inLP);
// pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i)->setJBBInput(inLP);
// first clear out the expression already synthesized
getGroupAttr()->setLogExprForSynthesis(NULL);
// Now do the synthLogProp. This is done here since stats for all the leaf
// nodes have been computed. We can now correctly break down the MultiJoin
// into a join tree join the jbbcs in the preferred order.
synthLogProp();
return QueryAnalysis::ASM()->getStatsForJBBSubset(jbbSubset_);
}
RelExpr* RelExpr::expandMultiJoinSubtree()
{
// pass it to children
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
child(i) = child(i)->expandMultiJoinSubtree();
// Need to clear log properties since we are re-shaping the tree.
getGroupAttr()->clearLogProperties();
return this;
}
// xxx move this to RelExpr.h and make inline
GroupAnalysis * RelExpr::getGroupAnalysis()
{
return getGroupAttr()->getGroupAnalysis();
}
void RelExpr::primeGroupAnalysis()
{
// Do no GroupAnalysis priming if there was no QueryAnalysis
if (!QueryAnalysis::Instance()->isAnalysisON())
return;
GroupAnalysis* groupAnalysis = getGroupAnalysis();
// The LocalJBBView is NULL unless a join or GB on top of join
groupAnalysis->setLocalJBBView(NULL);
// recursively call the children appending their subtreeTables
CANodeIdSet allSubtreeTables;
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
{
allSubtreeTables +=
child(i).getPtr()->getGroupAnalysis()->getAllSubtreeTables();
}
// If this node has TableAnalysis itself, add it.
if (groupAnalysis->getNodeAnalysis() AND
groupAnalysis->getNodeAnalysis()->getTableAnalysis())
{
allSubtreeTables += groupAnalysis->getNodeAnalysis()->getId();
}
groupAnalysis->setSubtreeTables(allSubtreeTables);
return;
}
void Join::primeGroupAnalysis()
{
// Do no GroupAnalysis priming if there was no QueryAnalysis
if (!QueryAnalysis::Instance()->isAnalysisON())
return;
GroupAnalysis* groupAnalysis = getGroupAnalysis();
// recursively call the children appending their subtreeTables
// and parentJBBViews
JBBSubset* localView = NULL;
if (canBePartOfJBB())
localView = new (groupAnalysis->outHeap()) JBBSubset(groupAnalysis->outHeap());
CANodeIdSet allSubtreeTables;
Int32 arity = getArity();
for (Lng32 i = 0; i < arity; i++)
{
GroupAnalysis* childAnalysis = child(i).getPtr()->getGroupAnalysis();
// use children parentViews to build this join local view
if (canBePartOfJBB())
{
const JBBSubset * childParentView =
childAnalysis->getParentJBBView();
if (childParentView && localView)
localView->addSubset((*childParentView));
else
localView = NULL;
}
allSubtreeTables += childAnalysis->getAllSubtreeTables();
}
if(canBePartOfJBB())
groupAnalysis->setLocalJBBView(localView);
groupAnalysis->setSubtreeTables(allSubtreeTables);
return;
}
void GroupByAgg::primeGroupAnalysis()
{
// Do no GroupAnalysis priming if there was no QueryAnalysis
if (!QueryAnalysis::Instance()->isAnalysisON())
return;
GroupAnalysis* groupAnalysis = getGroupAnalysis();
GroupAnalysis* childAnalysis = child(0).getPtr()->getGroupAnalysis();
// have same subtree tables as child
groupAnalysis->setSubtreeTables(childAnalysis->getAllSubtreeTables());
// The LocalJBBView is NULL unless
// 1. This is a GB that is a GBMember of some JBB i.e it has a GBAnalysis.
// 2. The child has a parentJBBView i.e. its either a JBBC or a JBBSubset.
// 3. This GB is the GBMember of the same JBB that the parentJBBView belongs to.
// This assure that any generated localJBBView would not mingle GB and JBBCs from
// different JBBs. For such evil combination, we set localJBBView to NULL.
if (getGBAnalysis() AND
childAnalysis->getParentJBBView() AND
childAnalysis->getParentJBBView()->getJBB()->getGBAnalysis() == getGBAnalysis())
{
JBBSubset* localView =
new (groupAnalysis->outHeap()) JBBSubset(groupAnalysis->outHeap());
localView->addSubset(*(childAnalysis->getParentJBBView()));
localView->setGB(getGBAnalysis()->getGroupingNodeId());
groupAnalysis->setLocalJBBView(localView);
}
else
{
groupAnalysis->setLocalJBBView(NULL);
}
// Warning: I should attach GBAnalysis to groupBy expr rather than
// NodeAnalysis.
// Warning: if a GB is pushed through multiple JBBs then we could
// endup with a JBBSubset with GB and JBBCs from different JBBs.\
// This can't happen in R2.0 but could happen after that. A solution
// for this exist for after R2.0.
return;
}
void Filter::primeGroupAnalysis()
{
// Do no GroupAnalysis priming if there was no QueryAnalysis
if (!QueryAnalysis::Instance()->isAnalysisON())
return;
// a filter node is transparent with respect to group analysis (so far)
GroupAnalysis* groupAnalysis = getGroupAnalysis();
GroupAnalysis* childAnalysis = child(0).getPtr()->getGroupAnalysis();
// have same subtree tables as child
groupAnalysis->setSubtreeTables(childAnalysis->getAllSubtreeTables());
// have same localJBBView as child. Make deep copy (if not NULL of course)
if (childAnalysis->getLocalJBBView())
{
JBBSubset* localView =
new (groupAnalysis->outHeap()) JBBSubset(groupAnalysis->outHeap());
localView->copySubsetMembers(*(childAnalysis->getLocalJBBView()));
groupAnalysis->setLocalJBBView(localView);
}
else
{
groupAnalysis->setLocalJBBView(NULL);
}
// have same node analysis as child
// Warning: Materialize could have a TableAnalysis this way! do
// we want that? xxx
groupAnalysis->setNodeAnalysis(childAnalysis->getNodeAnalysis());
return;
}
// -----------------------------------------------------------------------
// Methods for class ValueIdSet
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// ValueIdSet::accumulateReferencingValues()
//
// This method accumulates those expressions that are members of the
// referencingSet and are referencing any value in ReferencedSet.
// -----------------------------------------------------------------------
void ValueIdSet::accumulateReferencingExpressions(const ValueIdSet & referencingSet,
const ValueIdSet & referencedSet,
NABoolean doNotDigInsideVegRefs,
NABoolean doNotDigInsideInstNulls)
{
for (ValueId referencingExpr = referencingSet.init();
referencingSet.next(referencingExpr);
referencingSet.advance(referencingExpr))
{
for (ValueId referencedExpr = referencedSet.init();
referencedSet.next(referencedExpr);
referencedSet.advance(referencedExpr))
{
if (referencingExpr.getItemExpr()
->referencesTheGivenValue(
referencedExpr,
doNotDigInsideVegRefs,
doNotDigInsideInstNulls))
{
addElement(referencingExpr);
break;
}
} // referencedExpr loop
} // referencingExpr loop
} // ValueIdSet::accumulateReferencingExpressions()
// -----------------------------------------------------------------------
// ValueIdSet::removeNonReferencingExpressions()
//
// This method remove those expressions that are members of the
// referencingSet and not referencing any value in ReferencedSet.
// -----------------------------------------------------------------------
void ValueIdSet::removeNonReferencingExpressions(const ValueIdSet & otherSet,
NABoolean doNotDigInsideVegRefs,
NABoolean doNotDigInsideInstNulls)
{
for (ValueId myExpr = init(); next(myExpr); advance(myExpr))
{
NABoolean isReferencing = FALSE;
for (ValueId otherExpr = otherSet.init();
otherSet.next(otherExpr);
otherSet.advance(otherExpr))
{
if (myExpr.getItemExpr()
->referencesTheGivenValue(
otherExpr,
doNotDigInsideVegRefs,
doNotDigInsideInstNulls))
{
isReferencing = TRUE;
break;
}
} // otherExpr loop
if (NOT isReferencing)
subtractElement(myExpr); // delete expression from set
} // myExpr loop
}
// -----------------------------------------------------------------------
// ValueIdSet::findAllReferencedBaseCols()
//
// This method find all base columns referenced directly or indirectly
// via this ValueIdSet. This includes digging into VEGs and recursively
// walking ItemExpr trees until the leaf nodes.
// -----------------------------------------------------------------------
void ValueIdSet::findAllReferencedBaseCols(ValueIdSet & result) const
{
// Simply loop over all the expressions in the set
for (ValueId x= init(); next(x); advance(x) )
{
x.getItemExpr()->findAll(ITM_BASECOLUMN, result, TRUE, TRUE);
}
}
// -----------------------------------------------------------------------
// ValueIdSet::findAllReferencedIndexCols()
//
// This method find all index columns referenced directly or indirectly
// via this ValueIdSet. This includes digging into VEGs and recursively
// walking ItemExpr trees until the leaf nodes.
// -----------------------------------------------------------------------
void ValueIdSet::findAllReferencedIndexCols(ValueIdSet & result) const
{
// Simply loop over all the expressions in the set
for (ValueId x= init(); next(x); advance(x) )
{
x.getItemExpr()->findAll(ITM_INDEXCOLUMN, result, TRUE, TRUE);
}
}
// -----------------------------------------------------------------------
// ValueIdSet::findAllEqualityCols()
//
// This method finds all eqaulity columns referenced directly or indirectly
// via this ValueIdSet.
// -----------------------------------------------------------------------
void ValueIdSet::findAllEqualityCols(ValueIdSet & result) const
{
// Iterate through all the expressions in the set
for (ValueId x = init(); next(x); advance(x) )
{
x.getItemExpr()->findEqualityCols(result);
}
}
// -----------------------------------------------------------------------
// Perform columnAnalysis on this set
//
// The purpose of this method is to compute all the connection-by-reference
// and connection-by-VEG between columns. Connection-by-reference means
// simply that the two columns are referenced in the same predicate. The
// connection-by-VEG means that the two columns are in the same VEG.
//
// Another version of this method (which is commented) include also connection
// via range predicates and equal (non-VEG) predicates.
// -----------------------------------------------------------------------
void ValueIdSet::columnAnalysis(QueryAnalysis* qa, NABoolean predOnSemiOrLeftJoin) const
{
QueryAnalysis* queryAnalysis = QueryAnalysis::Instance();
ValueIdSet cols;
// Loop over all expressions in the set
for (ValueId x= init(); next(x); advance(x) )
{
cols.clear();
x.getItemExpr()->findAll(ITM_BASECOLUMN, cols, TRUE, TRUE);
// This method will create PredAnalysis for this value if non has
// been created so far.
QueryAnalysis::Instance()->findPredAnalysis(x);
for (ValueId c= cols.init(); cols.next(c); cols.advance(c) )
{
if (c.colAnalysis())
{
c.colAnalysis()->addToReferencingPreds(x);
x.predAnalysis()->addToReferencedCols(c);
}
}
// look for VEGPredicates and add them to their column's
// vegPreds_ field
ItemExpr* itemExpr = x.getItemExpr();
OperatorTypeEnum op = itemExpr->getOperatorType();
if (op == ITM_VEG_PREDICATE)
{
// I do not think we need to check for vegRef too. only vegPreds
VEG* veg = ((VEGPredicate*) itemExpr)->getVEG();
const ValueIdSet & values = veg->getAllValues();
for (ValueId v = values.init(); values.next(v); values.advance(v))
{
if (v.getItemExpr()->getOperatorType() == ITM_BASECOLUMN)
{
if (v.colAnalysis())
{
v.colAnalysis()->addToVegPreds(x);
x.predAnalysis()->addToVegConnectedCols(v);
}
}
}
}
else if((!predOnSemiOrLeftJoin) && (op == ITM_EQUAL))
{
ItemExpr * rhs = itemExpr->child(1);
ItemExpr * lhs = itemExpr->child(0);
ValueId rVid = rhs->getValueId();
ValueId lVid = lhs->getValueId();
ValueIdSet rightCols;
ValueIdSet leftCols;
NABoolean rightEvaluatesToConstant =
rhs->doesExprEvaluateToConstant(FALSE, TRUE);
NABoolean leftEvaluatesToConstant =
lhs->doesExprEvaluateToConstant(FALSE, TRUE);
/*
ValueIdSet rExprBaseCols;
if(rhs)
rhs->findAll(ITM_BASECOLUMN, rExprBaseCols, TRUE, TRUE);
ValueIdSet lExprBaseCols;
if(lhs)
lhs->findAll(ITM_BASECOLUMN, lExprBaseCols, TRUE, TRUE);
*/
if (rhs->getOperatorType() == ITM_BASECOLUMN)
{
rightCols += rhs->getValueId();
}
else if( rhs->getOperatorType() == ITM_VEG_REFERENCE )
{
ValueIdSet rVegMembers = ((VEGReference*)rhs)->getVEG()->getAllValues();
for (ValueId rVegMember = rVegMembers.init();
rVegMembers.next(rVegMember);
rVegMembers.advance(rVegMember))
{
ItemExpr * rExpr = rVegMember.getItemExpr();
if(rExpr && (rExpr->getOperatorType() == ITM_BASECOLUMN))
{
rightCols += rVegMember;
}
}
}
if (lhs->getOperatorType() == ITM_BASECOLUMN)
{
leftCols += lhs->getValueId();
}
else if( lhs->getOperatorType() == ITM_VEG_REFERENCE )
{
ValueIdSet lVegMembers = ((VEGReference*)lhs)->getVEG()->getAllValues();
for (ValueId lVegMember = lVegMembers.init();
lVegMembers.next(lVegMember);
lVegMembers.advance(lVegMember))
{
ItemExpr * lExpr = lVegMember.getItemExpr();
if(lExpr && (lExpr->getOperatorType() == ITM_BASECOLUMN))
{
leftCols += lVegMember;
}
}
}
// For this predicate to be useful for positionining the LHS, the RHS
// should be independent of any other columns of the same table. i.e.
// 1) RHS should evaluate to constan, OR
// 2) RHS should be produced by other JBBCs/tables.
if(!leftEvaluatesToConstant) // i.e. the left is not a constant
if(leftCols.entries() &&
rightEvaluatesToConstant)
{
for (ValueId lCol = leftCols.init();
leftCols.next(lCol);
leftCols.advance(lCol))
{
if (lCol.colAnalysis())
lCol.colAnalysis()->addToEqualityCoveringPreds(x);
//x.predAnalysis()->addToEqualityConnectedCols(lCol);
}
}
else{
for (ValueId lCol = leftCols.init();
leftCols.next(lCol);
leftCols.advance(lCol))
{
JBBC * lTableJBBC = NULL;
if (lCol.colAnalysis())
{
CANodeId lTable = lCol.colAnalysis()->
getTableAnalysis()->
getNodeAnalysis()->
getId();
lTableJBBC = lCol.colAnalysis()->
getTableAnalysis()->
getNodeAnalysis()->
getJBBC();
}
CANodeIdSet lJoinedJBBCs;
if(lTableJBBC)
lJoinedJBBCs = lTableJBBC->getJoinedJBBCs();
for(CANodeId joinedJBBC = lJoinedJBBCs.init();
lJoinedJBBCs.next(joinedJBBC);
lJoinedJBBCs.advance(joinedJBBC))
{
CANodeIdSet jbbcSubset;
jbbcSubset += joinedJBBC;
ValueIdSet joinPreds = joinedJBBC.
getNodeAnalysis()->
getJBBC()->
getJoinPreds();
if(joinPreds.contains(x) &&
jbbcSubset.jbbcsToJBBSubset()->coversExpr(rVid))
{
queryAnalysis->addProducingJBBC(rVid, joinedJBBC);
lCol.colAnalysis()->addEqualityRelation(lVid, x, rVid, joinedJBBC);
//x.predAnalysis()->addToEqualityConnectedCols(lCol);
}
}
}
}
// For this predicate to be useful for positionining the RHS, the LHS
// should be independent of any other columns of the same table. i.e.
// 1) LHS should evaluate to constant, OR
// 2) LHS should be produced by other JBBCs/tables.
if(!rightEvaluatesToConstant)
if(rightCols.entries() &&
leftEvaluatesToConstant)
{
for (ValueId rCol = rightCols.init();
rightCols.next(rCol);
rightCols.advance(rCol))
{
if (rCol.colAnalysis())
rCol.colAnalysis()->addToEqualityCoveringPreds(x);
//x.predAnalysis()->addToEqualityConnectedCols(rCol);
}
}
else{
for (ValueId rCol = rightCols.init();
rightCols.next(rCol);
rightCols.advance(rCol))
{
JBBC * rTableJBBC = NULL;
if (rCol.colAnalysis())
{
CANodeId rTable = rCol.colAnalysis()->
getTableAnalysis()->
getNodeAnalysis()->
getId();
rTableJBBC = rCol.colAnalysis()->
getTableAnalysis()->
getNodeAnalysis()->
getJBBC();
}
CANodeIdSet rJoinedJBBCs;
if(rTableJBBC)
rJoinedJBBCs = rTableJBBC->getJoinedJBBCs();
for(CANodeId joinedJBBC = rJoinedJBBCs.init();
rJoinedJBBCs.next(joinedJBBC);
rJoinedJBBCs.advance(joinedJBBC))
{
CANodeIdSet jbbcSubset;
jbbcSubset += joinedJBBC;
ValueIdSet joinPreds = joinedJBBC.
getNodeAnalysis()->
getJBBC()->
getJoinPreds();
if(joinPreds.contains(x) &&
jbbcSubset.jbbcsToJBBSubset()->coversExpr(lVid))
{
queryAnalysis->addProducingJBBC(lVid, joinedJBBC);
rCol.colAnalysis()->addEqualityRelation(rVid, x, lVid, joinedJBBC);
//x.predAnalysis()->addToEqualityConnectedCols(lCol);
}
}
}
}
}
/* Inspectors ignore this commented block
else if (op == ITM_LESS OR
op == ITM_LESS_EQ OR
op == ITM_GREATER OR
op == ITM_GREATER_EQ)
{
ItemExpr * lhs = itemExpr->child(0);
ItemExpr * rhs = itemExpr->child(1);
if (lhs->getOperatorType() == ITM_BASECOLUMN)
{
// For this query to be useful for positionining, the RHS should
// be independent of any other columns of the same table. i.e.
// 1) RHS should evaluate to constan, OR
// 2) RHS should have no other column of the same table.
v.colAnalysis()->addToVegPreds(x);
x.predAnalysis()->addToVegConnectedCols(v);
}
ValueId column = lhs->getValueId();
const ValueIdSet & values = veg->getAllValues();
for (ValueId v = values.init(); values.next(v); values.advance(v))
{
if (v.getItemExpr()->getOperatorType() == ITM_BASECOLUMN)
{
v.colAnalysis()->addToVegPreds(x);
x.predAnalysis()->addToVegConnectedCols(v);
}
}
}
else if (op == ITM_EQUAL)
{
ItemExpr * lhs = itemExpr->child(0);
ItemExpr * rhs = itemExpr->child(1);
if (lhs->getOperatorType() == ITM_BASECOLUMN)
{
// For this query to be useful for positionining, the RHS should
// be independent of any other columns of the same table. i.e.
// 1) RHS should evaluate to constan, OR
// 2) RHS should have no other column of the same table.
v.colAnalysis()->addToVegPreds(x);
x.predAnalysis()->addToVegConnectedCols(v);
}
ValueId column = lhs->getValueId();
const ValueIdSet & values = veg->getAllValues();
for (ValueId v = values.init(); values.next(v); values.advance(v))
{
if (v.getItemExpr()->getOperatorType() == ITM_BASECOLUMN)
{
v.colAnalysis()->addToVegPreds(x);
x.predAnalysis()->addToVegConnectedCols(v);
}
}
}
Inpspectors ignore end here
*/
}
return;
}
// -----------------------------------------------------------------------
// Methods for class GroupAnalysis
// -----------------------------------------------------------------------
// copy constructor.
GroupAnalysis::GroupAnalysis(const GroupAnalysis & other,
CollHeap *outHeap):
groupAttr_(other.groupAttr_),
allSubtreeTables_(other.allSubtreeTables_),
caNode_(other.caNode_),
heap_(outHeap)
{
if (other.localJBBView_)
{
localJBBView_ = new (heap_) JBBSubset(heap_);
localJBBView_->copySubsetMembers(*(other.localJBBView_));
}
else
{
localJBBView_ = NULL;
}
}
GroupAnalysis::~GroupAnalysis()
{
delete localJBBView_;
}
// clear analysis results in GroupAnalysis.
// note: this should clear only analysis results and
// should not clear groupAttr_ or heap_
void GroupAnalysis::clear()
{
localJBBView_ = NULL;
caNode_ = NULL;
allSubtreeTables_.clear();
}
const NAString GroupAnalysis::getText() const
{
NAString result("GroupAnalysis:\n");
if (localJBBView_)
{
result += "LocalJBBView :" + localJBBView_->getText() + "\n";
}
result += "SubtreeTables : " + allSubtreeTables_.getText() + "\n";
if (caNode_)
{
result += caNode_->getText() + "\n";
}
if (localJBBView_ &&
localJBBView_->getJBB() &&
localJBBView_->getJBB()->getInputEstLP() &&
localJBBView_->getJBB()->getInputEstLP()->getNodeSet())
{
result += "JBBInput : " +
localJBBView_->getJBB()->getInputEstLP()->getNodeSet()->getText() + "\n";
}
Int32 potential = groupAttr_->getPotential();
if(potential < 0)
result += "GroupPotential : -" + istring(-1*groupAttr_->getPotential()) + "\n";
else
result += "GroupPotential : " + istring(groupAttr_->getPotential()) + "\n";
return result;
}
void GroupAnalysis::display() const
{
print();
}
void GroupAnalysis::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// Comparison between two groups
// should move this to GroupAttributes ==
NABoolean GroupAnalysis::operator == (const GroupAnalysis & other)
{
if (this == &other)
return TRUE;
// two groups are the same if they have the same jbbParentView
//and same char inputs.
if (getGroupAttr()->getCharacteristicInputs() !=
other.getGroupAttr()->getCharacteristicInputs())
return FALSE;
const JBBSubset * parentJBBView = getParentJBBView();
if (parentJBBView == NULL OR
parentJBBView != other.getParentJBBView())
return FALSE;
// the two groups are equivalent
return TRUE;
}
// The parent JBB view of the JBBSubset. If this group is a
// JBBC then a JBBSubset of this JBBC alone is returned. Otherwise
// the localJBBView_ is returned
const JBBSubset * GroupAnalysis::getParentJBBView() const
{
if (getNodeAnalysis() &&
getNodeAnalysis()->getJBBC())
{
JBBSubset* parentView = new (heap_) JBBSubset();
parentView->addJBBC(getNodeAnalysis()->getId());
return parentView;
}
else
return localJBBView_;
}
void GroupAnalysis::reconcile(GroupAnalysis * other)
{
// reconcile NodeAnalysis
NodeAnalysis* othersNodeAnalysis = other->getNodeAnalysis();
if (!getNodeAnalysis())
{
setNodeAnalysis(othersNodeAnalysis);
}
else
{
// I have a NodeAnalysis
// Reconcile NodeAnalysis
// In order to avoid inconsistencies between the ids used in
// the JBBSubsets and the id of the JBBC resulting from the
// reconcilation, we use the NodeAnalysis with the JBBC as
// a base for the reconcilation.
// xxx
// Note that we may still have a similar problem with
// allSubTreeTables (point to obsolete TableAnalysis ids),
// we will need to fix this when allSubTreeTables starting
// getting used (post R2.0).
if(othersNodeAnalysis &&
othersNodeAnalysis->getJBBC() &&
!getNodeAnalysis()->getJBBC())
{
// reconcile othersNodeAnalysis with mine
othersNodeAnalysis->reconcile(getNodeAnalysis());
caNode_ = othersNodeAnalysis;
}
else{
// reconcile NodeAnalysis with other
getNodeAnalysis()->reconcile(othersNodeAnalysis);
}
}
// reconcile localJBBView
JBBSubset* othersLocalView = (JBBSubset*) other->getLocalJBBView();
if (!getLocalJBBView() && othersLocalView)
{
setLocalJBBView(othersLocalView);
}
else if (getLocalJBBView() && !othersLocalView)
{
// keep yours in this case
}
else if (getLocalJBBView() && othersLocalView)
{
if (getLocalJBBView()->getJBB() == othersLocalView->getJBB())
{
// The assertion below may become invalid in the future if we implement
// features like elimination of referential integrity joins.
CMPASSERT(getLocalJBBView()->getJBBCs() == othersLocalView->getJBBCs());
// GroupBy may be eliminated legaly via the GB elimination rule.
// If any of the two groupAnalysis has no GB, make the result without GB.
// First lets check for the following
// if both have GBs they should be the same
CMPASSERT((othersLocalView->getGB()==NULL_CA_ID) ||
(getLocalJBBView()->getGB()==NULL_CA_ID) ||
(getLocalJBBView()->getGB()==othersLocalView->getGB()));
if (othersLocalView->getGB() == NULL_CA_ID)
{
((JBBSubset*)(getLocalJBBView()))->setGB(NULL_CA_ID);
}
}
else
{
// In some rare cases we may get two JBBSubsets from different JBBs
// merging. This requires an expression (group) that is a JBBC for
// for a parent JBB (parentJBBView) and a mainSubset for a child JBB
// (localJBBView). Then an elimination for a GB that is a parent expr
// for the aforementioned expr could result in a merge of the groups
// of the two expressions. The top expr (GB) would have a localJBBView of
// {JBBC;GB}, while the lower expr woulf have a localJBBView equal to
// the mainSet of the lower JBB
// In practice this should be the child expr groupAnalysis and other should
// be that of the GB. But its safer not to assume that. So we try both ways.
if (othersLocalView->getJBBCs().entries() == 1)
{
// other has 1 JBBC and this is mainJBBSubset
CMPASSERT(*(getLocalJBBView()) ==
getLocalJBBView()->getJBB()->getMainJBBSubset());
// We keep localJBBView as is
}
else
{
// other is a mainJBBSubset and this has 1 JBBC
CMPASSERT(getLocalJBBView()->getJBBCs().entries() == 1 AND
*(othersLocalView) ==
othersLocalView->getJBB()->getMainJBBSubset());
setLocalJBBView(othersLocalView); // take the values of the child group
}
}
}
// For now allSubtreeTables should be the same
// this may change later if we enable table re-use and table elimination
CCMPASSERT(getAllSubtreeTables() ==
other->getAllSubtreeTables());
}
// list of all promising indexes in this subtree
// this will replace availableBtreeIndexes in GroupAttr
//const LIST(AccessPathAnalysis*) &
// GroupAnalysis::getAllSubtreePromisingAccessPaths()
//{
// xxx();
//}
// -----------------------------------------------------------------------
// Methods for class NodeAnalysis
// -----------------------------------------------------------------------
EstLogPropSharedPtr NodeAnalysis::getStats()
{
if (stats_ == NULL)
{
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
stats_ = appStatMan->getStatsForCANodeId(id_);
}
return stats_;
}
CostScalar NodeAnalysis::getCardinality()
{
if (stats_ == NULL)
{
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
stats_ = appStatMan->getStatsForCANodeId(id_);
}
return stats_->getResultCardinality();
}
// reconcile this NodeAnalysis object with the NodeAnalysis
// object passed in
// This is used in case of Group Merges
void NodeAnalysis::reconcile(NodeAnalysis * other)
{
if (!other)
{
// the other guy is NULL
// nothing to reconcile
return;
}
if (getId() == other->getId())
return;
if(!getJBBC())
{
setJBBC(other->getJBBC());
}
else if (!other->getJBBC())
{
// keep yours
}
else
{
// both should be same JBBC
CMPASSERT(getJBBC() == other->getJBBC());
}
if(!getTableAnalysis())
{
setTableAnalysis(other->getTableAnalysis());
}
else if (!other->getTableAnalysis())
{
// keep yours
}
else
{
// both should have the same table analysis
CCMPASSERT(getTableAnalysis() == other->getTableAnalysis());
}
// NodeAnalysis with GBAnalysis are not reconcilable
CMPASSERT((!getGBAnalysis()) && (!other->getGBAnalysis()));
}
// This method computes some basic stats about the node, and caches them
void NodeAnalysis::computeStats()
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
stats_ = appStatMan->getStatsForCANodeId(id_);
recordSize_ = getOriginalExpr()->getGroupAttr()->getRecordLength();
// if table compute table specific stats
if (getTableAnalysis())
getTableAnalysis()->computeTableStats();
return;
}
const NAString NodeAnalysis::getText() const
{
NAString result("NodeAnalysis # ");
result += istring(id_) + ":\n";
if (jbbc_)
{
result += "This node is a JBBC\n";
result += jbbc_->getText();
}
if (table_)
{
result += "This node is a Base Table\n";
result += table_->getText();
}
result += "\n";
return result;
}
void NodeAnalysis::display() const
{
print();
}
void NodeAnalysis::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
NABoolean NodeAnalysis::isExtraHub()
{
//// Debugging stuff TBD!!!
//NAString name(table_->getTableDesc()->getNATable()->getTableName().getQualifiedNameAsString());
//if (!name.compareTo("CAT.TPCD.REGION"))
//return TRUE;
return isExtraHub_;
}
// -----------------------------------------------------------------------
// Methods for class TableAnalysis
// -----------------------------------------------------------------------
// List access paths that are promising for predicate lookup
const LIST(AccessPathAnalysis*) &
TableAnalysis::promisingAccessPathsForLookup()
{
// xxx Not complete. Return all indexes for now.
return accessPaths_;
}
// This is the helper function for the interfaces below:
// - indexOnly specifies the list of access paths to search.
// - cc specifies the coverage (index or part key).
// - vidSet is the provided set to check for coverage.
// - exactMatch is only relevant for partitioning key coverage and
// specifies if exact match is needed with the provided set.
LIST(AccessPathAnalysis*)
TableAnalysis::getCoveringAccessPaths(NABoolean indexOnly,
coverageCriteria cc,
const ValueIdSet& vidSet,
NABoolean exactMatch)
{
LIST(AccessPathAnalysis*) accessPaths(STMTHEAP), retAccessPaths(STMTHEAP);
if (indexOnly)
// Search index-only access paths.
accessPaths = indexOnlyAccessPaths_;
else
// Search all access paths.
accessPaths = accessPaths_;
// Iterate through access paths to locate required access paths
for (CollIndex i=0; i<accessPaths.entries(); i++)
{
// Index coverage
if (cc == INDEX)
{
if (accessPaths[i]->numIndexPrefixCovered(vidSet, exactMatch))
// The index prefix is covered, so add the access path to the list.
retAccessPaths.insert(accessPaths[i]);
}
// Partitioning key coverage
if (cc == PART_KEY)
{
if (accessPaths[i]->isPartKeyCovered(vidSet, exactMatch))
// The part key is covered, so add the access path to the list.
retAccessPaths.insert(accessPaths[i]);
}
}
// Return the list of access paths.
return retAccessPaths;
}
// List access paths where given set covers prefix of an index.
//
// If indexOnly flag is set, only return index-only access paths
// covering prefix of an index. Otherwise, return all access
// paths cevering prefix of an index.
//
// If exactMatch flag is set, only return access paths where all
// the members of the provided set are covered i.e the provided set
// is a subset of index columns. Otherwise, return access paths
// where the provided set covers prefix of an index.
LIST(AccessPathAnalysis*)
TableAnalysis::accessPathsCoveringIndex(const ValueIdSet& vidSet,
NABoolean indexOnly,
NABoolean exactMatch)
{
return getCoveringAccessPaths(indexOnly, INDEX, vidSet, exactMatch);
}
// List access paths where given set cover entire partitioning key.
//
// If indexOnly flag is set, only return index-only access paths
// covering entire partitioning key. Otherwise, return access paths
// cevering entire partitioning key.
//
// If exactMatch flag is set, only return access paths where the
// provided set is exact match with the partitioning key. Otherwise,
// return access paths where the partitioning key columns are a
// prefix-subset of the provded set.
LIST(AccessPathAnalysis*)
TableAnalysis::accessPathsCoveringPartKey(const ValueIdSet& vidSet,
NABoolean indexOnly,
NABoolean exactMatch)
{
return getCoveringAccessPaths(indexOnly, PART_KEY, vidSet, exactMatch);
}
const NAString TableAnalysis::getText() const
{
NAString result(">> TableAnalysis # ");
result += istring(caNodeAnalysis_->getId()) + ":\n";
result += tableDesc_->getNATable()->getTableName().getQualifiedNameAsString();
result += "\n";
result += "localPreds: " + valueIdSetGetText(localPreds_) + "\n";
#ifdef MVQR
result += "referencingPreds: " + valueIdSetGetText(referencingPreds_) + "\n";
result += "vegPreds: " + valueIdSetGetText(vegPreds_) + "\n";
#endif
result += "constCols: " + valueIdSetGetText(constCols_) + "\n";
result += "usedCols: " + valueIdSetGetText(usedCols_) + "\n";
for (ValueId x = usedCols_.init(); usedCols_.next(x); usedCols_.advance(x))
{
result += ">>>> " + x.colAnalysis()->getText();
}
if (caNodeAnalysis_ && caNodeAnalysis_->getJBBC())
{
const CANodeIdSet &predecessors = caNodeAnalysis_->getJBBC()->getPredecessorJBBCs();
if (predecessors.entries() > 0)
{
char buf[12];
NABoolean first = TRUE;
result += "predecessor JBBCs: {";
for (CANodeId x=predecessors.init(); predecessors.next(x); predecessors.advance(x))
{
if (!first)
result += ", ";
first = FALSE;
snprintf(buf, sizeof(buf), "%d", x.toUInt32());
result += buf;
}
result += "}";
}
}
result += "\nIndexOnly Access Paths: \n";
for (CollIndex i=0; i<indexOnlyAccessPaths_.entries(); i++)
{
result += indexOnlyAccessPaths_[i]->getText() + "\n";
}
result += "\nAll Access Paths: \n";
for (CollIndex i=0; i<accessPaths_.entries(); i++)
{
result += accessPaths_[i]->getText() + "\n";
}
return result;
}
void TableAnalysis::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
void TableAnalysis::initAccessPaths()
{
const LIST(IndexDesc *) &indexes = tableDesc_->getIndexes();
for (CollIndex i = 0; i < indexes.entries(); i++)
{
AccessPathAnalysis * accessPath =
new (heap_) AccessPathAnalysis(indexes[i], this, heap_);
indexes[i]->setAccessPathAnalysis(accessPath);
accessPaths_.insert(accessPath);
// if index is Unique add to uniqueIndexes_
// have to consider base table also
if((indexes[i]->isUniqueIndex()) ||
(indexes[i]->isClusteringIndex() &&
(indexes[i]->getPrimaryTableDesc()->getPrimaryKeyColumns().entries()>0)))
uniqueIndexes_.insert(accessPath);
// Set AccessPathAnalysis for base table.
if (indexes[i]->isClusteringIndex())
setApaForBaseTable(accessPath);
}
}
void TableAnalysis::initIndexOnlyAccessPaths()
{
// Get the expression from node analysis.
Scan *scanExpr = (Scan *)getNodeAnalysis()->getOriginalExpr();
// Only do this if the operator is REL_SCAN.
if (scanExpr->getOperatorType() != REL_SCAN)
return;
GroupAttributes *groupAttr = scanExpr->getGroupAttr();
// All the value ids that are required by the scan.
ValueIdSet requiredValueIds(groupAttr->getCharacteristicOutputs());
// Add all the base cols that contribute to VEGPredicates.
scanExpr->addBaseColsFromVEGPreds(requiredValueIds);
// Add local predicates since they are also required.
requiredValueIds += getLocalPreds();
for (CollIndex i=0; i<accessPaths_.entries(); i++)
{
ValueIdSet referencedInputs;
ValueIdSet coveredSubexpr;
ValueIdSet unCoveredExpr;
GroupAttributes indexOnlyGA;
NABoolean indexOnlyScan;
IndexDesc *index = accessPaths_[i]->getIndexDesc();
// make group attributes for an index scan
indexOnlyGA.addCharacteristicOutputs(index->getIndexColumns());
indexOnlyGA.addCharacteristicOutputs(scanExpr->getExtraOutputColumns());
// Check if the index covers all required values.
// There is additional information that is returned but not used here.
indexOnlyScan =
requiredValueIds.isCovered(groupAttr->getCharacteristicInputs(),
indexOnlyGA,
referencedInputs,
coveredSubexpr,
unCoveredExpr);
if (indexOnlyScan)
{
// If all the required value ids are covered, add this to the list
// of index-only access paths.
indexOnlyAccessPaths_.insert(accessPaths_[i]);
}
}
}
void TableAnalysis::finishAnalysis()
{
ValueIdSet inputsToScan;
inputsToScan = getNodeAnalysis()->
getOriginalExpr()->
getGroupAttr()->
getCharacteristicInputs();
ValueIdSet scanInputs;
for (ValueId inputVid = inputsToScan.init();
inputsToScan.next(inputVid);
inputsToScan.advance(inputVid) )
{
if(inputVid.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE)
{
const ItemExpr * vegRef = inputVid.getItemExpr();
scanInputs +=
((VEGReference *)vegRef)->getVEG()->getAllValues();
}
else
scanInputs += inputVid;
}
connectedCols_.clear();
connectedTables_.clear();
ColAnalysis* col = NULL;
for (ValueId c = usedCols_.init();
usedCols_.next(c);
usedCols_.advance(c))
{
col = c.colAnalysis();
col->finishAnalysis();
connectedCols_ += col->getConnectedCols();
connectedTables_ += col->getConnectedTables();
referencingPreds_ += col->getReferencingPreds();
vegPreds_ += col->getVegPreds();
}
// assert that connectedCols_ and connectedTables_ does
// not contain self.
// Another consistency check:
// we can recompute connectedCols_ and connectedTables_
// here based on the preds and assert if different that
// above computation. In Debug code only.
// Now finalize local preds and compute constCols_
ValueIdSet localVegs = localPreds_;
localVegs.intersectSet(vegPreds_);
for (ValueId veg = localVegs.init();
localVegs.next(veg);
localVegs.advance(veg))
{
// a VegPred is considered local if it has a constant value
// or if it equates 2 or more columns from this table
// first lets find the columns from this table that are in this veg
ValueIdSet myColsInVeg = veg.predAnalysis()->getVegConnectedCols();
myColsInVeg.intersectSet(usedCols_);
// Now let us see if this veg reference any const expression.
// (I am thinking of caching the fact that veg is constant in
// PredAnalysis in future).
const ItemExpr* pred = veg.getItemExpr();
const VEG * predVEG = ((VEGPredicate *)pred)->getVEG();
ValueIdSet vegGroup;
vegGroup += predVEG->getAllValues();
if (vegGroup.referencesAConstExpr())
{
// Certainly a local pred.
// also add tableColsInVeg to constCols.
constCols_ += myColsInVeg;
continue;
}
if (myColsInVeg.entries() > 1)
{
// do nothing, this is a local Pred since it is connecting
// two or more of my columns. The fact that it may be connecting
// to other tables as well is fine.
continue;
}
if (vegGroup.intersectSet(scanInputs).entries())
continue;
// Fail both conditions. Remove from localPreds_.
// This predicate can't be evaluated localy
localPreds_ -= veg;
}
// Add a column to constCols_ if it has an equality predicate
// against a constant that did not become a VEG e.g. c1 = extract_month(10212009);
for (ValueId colId= usedCols_.init(); usedCols_.next(colId); usedCols_.advance(colId))
{
ColAnalysis* colAn = QueryAnalysis::Instance()->getColAnalysis(colId);
if(colAn->getEqualityCoveringPreds().entries())
constCols_+=colId;
}
// Initialize index only access paths.
if(CmpCommon::getDefault(COMP_BOOL_23) == DF_ON)
initIndexOnlyAccessPaths();
return;
}
void TableAnalysis::checkIfCompressedHistsViable()
{
QueryAnalysis *qa = QueryAnalysis::Instance();
if(!qa || !qa->isCompressedHistsViable())
return;
NABoolean okToCompress = TRUE;
for (ValueId c = usedCols_.init();
usedCols_.next(c);
usedCols_.advance(c))
{
if(!okToCompress || (CmpCommon::getDefault(COMP_BOOL_25) == DF_OFF))
break;
ColAnalysis * colAnalysis = c.colAnalysis();
if(colAnalysis)
{
ValueIdSet localPreds = colAnalysis->getLocalPreds();
ValueIdSet vegPreds = colAnalysis->getVegPreds();
ValueIdSet refPreds = colAnalysis->getReferencingPreds();
refPreds.subtractSet(localPreds);
refPreds.subtractSet(vegPreds);
for (ValueId col = refPreds.init();
refPreds.next(col);
refPreds.advance(col))
{
if(!okToCompress)
break;
ItemExpr *tempIE = col.getItemExpr();
if(tempIE->getOperatorType() != ITM_OR)
continue;
ValueIdSet constExprs;
tempIE->accumulateConstExprs(constExprs);
for (ValueId constExprId = constExprs.init();
constExprs.next(constExprId);
constExprs.advance(constExprId))
{
OperatorTypeEnum op = constExprId.getItemExpr()->getOperatorType();
if( (op != ITM_EQUAL) &&
(op != ITM_NOT_EQUAL) &&
(op != ITM_LESS) &&
(op != ITM_LESS_EQ) &&
(op != ITM_GREATER) &&
(op != ITM_GREATER_EQ))
continue;
if(constExprId.getItemExpr()->referencesTheGivenValue(c))
{
okToCompress = FALSE;
break;
}
}
}
}
}
if(!okToCompress)
qa->disableCompressedHistsViable();
}
ValueIdSet TableAnalysis::getConnectingVegPreds(ColAnalysis & col) const
{
ValueIdSet result = vegPreds_;
result.intersectSet(col.getVegPreds());
return result;
}
ValueIdSet TableAnalysis::getConnectingVegPreds(TableAnalysis & other) const
{
ValueIdSet result = vegPreds_;
result.intersectSet(other.getVegPreds());
return result;
}
////////////////////////////////////////
// Get all JBBCs that are connected to this set of columns and the join preds
// This table must be JBBC
CANodeIdSet TableAnalysis::getJBBCsConnectedToCols(const CANodeIdSet & jbbcs,
const ValueIdSet & cols,
ValueIdSet & joinPreds /*OUT*/,
ValueIdSet & localPreds /*OUT*/,
ValueIdSet * predCols /*OUT*/ /*could be either join or local pred*/)
{
CANodeIdSet result;
localPreds.clear();
joinPreds.clear();
if (predCols)
predCols->clear();
for (ValueId col = cols.init();
cols.next(col);
cols.advance(col))
{
ColAnalysis* colAn = col.colAnalysis();
if (!colAn) continue; // no preds on this column
const ValueIdSet & colLocalPreds = colAn->getLocalPreds();
if (colLocalPreds.entries())
{
localPreds += colLocalPreds;
if (predCols)
*predCols += col;
}
for (CANodeId jbbc = jbbcs.init();
jbbcs.next(jbbc);
jbbcs.advance(jbbc))
{
//ValueIdSet currentJoinPreds = colAn->getConnectingPreds(jbbc.getNodeAnalysis()->getJBBC());
ValueIdSet currentJoinPreds = colAn->getAllConnectingPreds(jbbc.getNodeAnalysis()->getJBBC());
if (currentJoinPreds.entries() > 0)
{
result.insert(jbbc);
joinPreds += currentJoinPreds;
if (predCols)
*predCols += col;
}
}
}
return result;
};
// Get the JBBCs that are connected to the maximum prefix size in the given column list
// This table must be JBBC
CANodeIdSet TableAnalysis::getJBBCsConnectedToPrefixOfList(const CANodeIdSet & jbbcs,
const ValueIdList & cols,
Lng32 & prefixSize /*OUT*/,
ValueIdSet & joinPreds /*OUT*/,
ValueIdSet & localPreds /*OUT*/)
{
CANodeIdSet result;
prefixSize = 0;
localPreds.clear();
joinPreds.clear();
for (CollIndex i = 0; i < cols.entries(); i++)
{
NABoolean continuePrefix = FALSE;
ColAnalysis* colAn = cols[i].colAnalysis();
if (!colAn) break; // no preds on this column
// xxx should change this to positioning local preds post EAP
const ValueIdSet & colLocalPreds = colAn->getLocalPreds();
if (colLocalPreds.entries() &&
constCols_.contains(colAn->getColumnId()))
{
localPreds += colLocalPreds;
continuePrefix = TRUE;
}
for (CANodeId jbbc = jbbcs.init();
jbbcs.next(jbbc);
jbbcs.advance(jbbc))
{
//ValueIdSet currentJoinPreds = colAn->getConnectingPreds(jbbc.getNodeAnalysis()->getJBBC());
ValueIdSet currentJoinPreds = colAn->getAllConnectingPreds(jbbc.getNodeAnalysis()->getJBBC());
if (currentJoinPreds.entries() && result.legalAddition(jbbc))
{
result.insert(jbbc);
joinPreds += currentJoinPreds;
continuePrefix = TRUE;
}
}
if (continuePrefix)
prefixSize++;
else
break;
}
return result;
};
// Get the JBBCs that are connected to the maximum prefix size in the given column list
// This table must be JBBC
ValueIdSet TableAnalysis::getColsConnectedViaEquiJoinPreds(const CANodeIdSet & jbbcs)
{
ValueIdSet result;
QueryAnalysis* qa = QueryAnalysis::Instance();
CANodeIdSet joinedNodes = jbbcs;
CANodeIdSet mySelf(this->getNodeAnalysis()->getId());
JBBSubset * joinedJBBCs = joinedNodes.jbbcsToJBBSubset();
JBBSubset * mySubset = mySelf.jbbcsToJBBSubset();
// get all join preds in this JBBSubset
ValueIdSet equiJoinPreds = joinedJBBCs->joinPredsWithOther(*mySubset);
// narrow down join preds to veg preds
equiJoinPreds.intersectSet(getVegPreds());
// table columns involved in current query
ValueIdSet tableCols = getTableDesc()->getColumnList();
// iterate over join preds, for each pred
// get the column from this table
for (ValueId joinPred = equiJoinPreds.init();
equiJoinPreds.next(joinPred);
equiJoinPreds.advance(joinPred))
{
PredAnalysis * predAnalysis = joinPred.predAnalysis();
ValueIdSet predRefCols = predAnalysis->getReferencedCols();
predRefCols.intersectSet(tableCols);
result.insert(predRefCols);
}
// add columns connected via non-VEG equijoin predicates
// e.g. t1.a in predicate t1.a = t2.b + 1
for (ValueId column = usedCols_.init();
usedCols_.next(column);
usedCols_.advance(column))
{
ColAnalysis * ca = qa->getColAnalysis(column);
CANodeIdSet equalityConnectedJBBCs;
if(ca)
equalityConnectedJBBCs = ca->getEqualityConnectedJBBCs();
equalityConnectedJBBCs.intersectSet(jbbcs);
if(equalityConnectedJBBCs.entries())
result.insert(column);
}
return result;
};
// get equality Join predicates connected to the given CANodeID
// jbbc should be a table
ValueIdSet TableAnalysis::getColsConnectedViaVEGPreds(CANodeId jbbc)
{
ValueIdSet equiJoinPreds;
CANodeIdSet connectedNodes = getConnectedTables();
if (!connectedNodes.contains(jbbc) )
return equiJoinPreds;
TableAnalysis * connctedTA = jbbc.getNodeAnalysis()->getTableAnalysis();
if (!connctedTA )
return equiJoinPreds;
equiJoinPreds.insert(getConnectingVegPreds(*connctedTA));
// narrow down join preds to equijoin preds
equiJoinPreds.intersectSet(getVegPreds());
return equiJoinPreds;
}
// get equality factor for the given set of columns. Equality factor is
// defined as base UEC of given column set dividied by the base row count
// In case of multiple columns, if it would be multi-col UEC. If no stats
// exist for these set of columns, equality factor is zero.
CostScalar TableAnalysis::getEqualityVal(ValueIdSet joinCols)
{
CostScalar equalityFactor = csZero;
// nothing to do for cross products
if (joinCols.entries() == 0)
return equalityFactor;
CostScalar baseRC = getCardinalityOfBaseTable();
CostScalar baseUec = getBaseUec(joinCols);
if (baseUec == csMinusOne)
return equalityFactor;
equalityFactor = baseUec/baseRC;
return equalityFactor;
}
// UEC of the given column set
CostScalar TableAnalysis::getBaseUec(const ValueIdSet & columns)
{
const MultiColumnUecList * MCUecOfCurJoinPred = \
getStatsOfBaseTable()->getColStats().getUecList();
if (!MCUecOfCurJoinPred)
return csMinusOne;
ValueIdSet baseColSet;
columns.findAllReferencedBaseCols(baseColSet);
// get all join columns of this table
baseColSet.intersectSet(getUsedCols());
// it could be single or multi-col UEC depending on the number of columns passed
CostScalar baseUec = MCUecOfCurJoinPred->lookup(baseColSet);
if (baseUec <= 0)
{
TableDesc * tableDesc = getTableDesc();
// The flag is passed to indicate if stats should be used to check for uniqueness
// too. That is define unique if rowcount == uec
if (baseColSet.doColumnsConstituteUniqueIndex(tableDesc, FALSE) )
baseUec = getCardinalityOfBaseTable();
}
return baseUec;
}
// Compute the local predicates on this table that references any of these columns
// of the table
ValueIdSet TableAnalysis::getLocalPredsOnColumns(const ValueIdSet & cols,
ValueIdSet * colsWithLocalPreds /*OUT*/)
{
// one other way of doing this is to get the table local preds and then select from
// it the preds that reference cols in this set.
ValueIdSet result;
if (colsWithLocalPreds)
colsWithLocalPreds->clear();
for (ValueId col = cols.init();
cols.next(col);
cols.advance(col))
{
ColAnalysis* colAn = col.colAnalysis();
if (!colAn) continue; // no preds on this column
const ValueIdSet & colLocalPreds = colAn->getLocalPreds();
if (colLocalPreds.entries())
{
result += colLocalPreds;
if (colsWithLocalPreds)
*colsWithLocalPreds += col;
}
}
// assert that result is subset of table local preds
CMPASSERT(localPreds_.contains(result));
return result;
};
// Compute the local predicates on this table that references a prefix of this
// column list. compute also the prefix size.
ValueIdSet TableAnalysis::getLocalPredsOnPrefixOfList(const ValueIdList & cols,
Lng32 & prefixSize /*OUT*/)
{
// xxx warning: at this point we do not distinguish between local predicates
// and local predicates good for positioning. For example a=c is local but not
// good for key positioning. Moreover a=c will be in a local preds but it needs
// both a and c to execute. i.e. its local for the table but not the col.
// We need to introduce positioning local preds for columns. This will be done
// in the future.
ValueIdSet result;
prefixSize = 0;
for (CollIndex i = 0; i < cols.entries(); i++)
{
ColAnalysis* colAn = cols[i].colAnalysis();
if (!colAn) break; // no preds on this column
const ValueIdSet & colLocalPreds = colAn->getLocalPreds();
if (colLocalPreds.entries())
{
result += colLocalPreds;
prefixSize++;
}
else {
if ( !cols[i].isSaltColumn() )
break;
}
}
return result;
};
// This method computes some basic stats about the table, and caches them
void TableAnalysis::computeTableStats()
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
ValueIdSet emptySet;
CANodeId id = getNodeAnalysis()->getId();
statsOfBaseTable_ = appStatMan->getStatsForCANodeId(id,(*GLOBAL_EMPTY_INPUT_LOGPROP),&emptySet);
statsAfterLocalPredsOnCKPrefix_ =
appStatMan->getStatsForLocalPredsOnCKPOfJBBC(id); // should change ASM method name
recordSizeOfBaseTable_ = tableDesc_->getNATable()->getRecordLength();
QueryAnalysis* queryAnalysis = QueryAnalysis::Instance();
TableAnalysis * largestTable = queryAnalysis->getLargestTable();
if(!largestTable)
queryAnalysis->setLargestTable(this);
else{
CostScalar largestTableCardinality =
largestTable->getCardinalityAfterLocalPredsOnCKPrefix();
CostScalar largestTableRecordSize =
largestTable->getRecordSizeOfBaseTable();
CostScalar largestTableSize =
largestTableCardinality * largestTableRecordSize;
CostScalar tableCardinality =
getCardinalityAfterLocalPredsOnCKPrefix();
CostScalar tableRecordSize =
getRecordSizeOfBaseTable();
CostScalar tableSize =
tableCardinality * tableRecordSize;
if (tableSize > largestTableSize)
queryAnalysis->setLargestTable(this);
}
return;
}
EstLogPropSharedPtr TableAnalysis::getStatsOfBaseTable()
{
if (statsOfBaseTable_ == NULL)
{
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
ValueIdSet emptySet;
CANodeId id = getNodeAnalysis()->getId();
statsOfBaseTable_ = appStatMan->getStatsForCANodeId(id,(*GLOBAL_EMPTY_INPUT_LOGPROP),&emptySet);
}
return statsOfBaseTable_;
}
CostScalar TableAnalysis::getCardinalityOfBaseTable()
{
return getStatsOfBaseTable()->getResultCardinality();
}
CostScalar TableAnalysis::getMaxCardinalityOfBaseTable()
{
return getStatsOfBaseTable()->getMaxCardEst();
}
NABoolean TableAnalysis::hasMatchingHashPartitioning(TableAnalysis * other)
{
// get the index desc for me
const IndexDesc * myIndxDesc = getTableDesc()->getClusteringIndex();
// get the index desc for the other table
const IndexDesc * otherIndxDesc = other->getTableDesc()->getClusteringIndex();
// get my partitioning function
PartitioningFunction * myPartFunc = myIndxDesc->getNAFileSet()->getPartitioningFunction();
// get other table partition function
PartitioningFunction * otherPartFunc = otherIndxDesc->getNAFileSet()->getPartitioningFunction();
// return false if the partitioning type does not match
if( myPartFunc->getPartitioningFunctionType() !=
otherPartFunc->getPartitioningFunctionType())
return FALSE;
// return false if not hash partitioned
if ((!myPartFunc->isATableHashPartitioningFunction()))
return FALSE;
// get my number of partitions
Int32 myPartNum = myIndxDesc->
getNAFileSet()->
getCountOfPartitions();
// get other number of partitions
Int32 otherPartNum = otherIndxDesc->
getNAFileSet()->
getCountOfPartitions();
// return false if number of partitions don't match
if (myPartNum != otherPartNum)
return FALSE;
// get my partitioning key
ValueIdList myPartKey = ((TableHashPartitioningFunction *) myPartFunc)->getKeyColumnList();
// get other partitioning key
ValueIdList otherPartKey = ((TableHashPartitioningFunction *) otherPartFunc)->getKeyColumnList();
// return false if number of partitioning key columns is different
if (myPartKey.entries() != otherPartKey.entries())
return FALSE;
for (UInt32 i = 0; i < myPartKey.entries(); i++)
{
//
ColAnalysis * myColAnalysis = myPartKey[i].colAnalysis();
if(!myColAnalysis)
return FALSE;
if(!myColAnalysis->getConnectedCols().contains(otherPartKey[i]))
return FALSE;
}
return TRUE;
};
NABoolean TableAnalysis::predsOnUnique(ValueIdSet& vidSet,
NABoolean *hasPrefix)
{
if(!vidSet.entries())
return FALSE;
ValueIdSet preds = vidSet;
preds += localPreds_;
for (CollIndex i=0; i<uniqueIndexes_.entries(); i++)
{
if(uniqueIndexes_[i]->keyCoveredByEqualityPreds(preds, hasPrefix))
return TRUE;
}
return FALSE;
}
// get a rough estimate of cost for doing a nested join
// number of probes = dataFlowFromEdge
// Rows of fact table that will be scanned = factTableRowsToScan
// 0 probes = Hash Join i.e. no nested join
CostScalar TableAnalysis::computeDataAccessCostForTable(
CostScalar probes,
CostScalar tableRowsToScan) const
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
CANodeId tableId = getNodeAnalysis()->getId();
CostScalar costPerProbe ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_0));
CostScalar costPerUnitSize ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_1));
CostScalar costPerRowReturned ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_2));
//Record Size of the factTable
CostScalar tableRecordSize = getTableDesc()->
getNATable()->
getRecordLength();
CostScalar subsetSizeKB = (tableRowsToScan * tableRecordSize)/1024;
CostScalar tableRowsReturned = appStatMan->
getStatsForCANodeId(tableId)->
getResultCardinality();
RelExpr * tableScanExpr = getNodeAnalysis()->getOriginalExpr();
CostScalar sizeOfRowReturnedFromTable =
tableScanExpr->getGroupAttr()->getRecordLength();
CostScalar costForTable = (costPerProbe * probes) +
(costPerUnitSize * subsetSizeKB) +
(costPerRowReturned *
tableRowsReturned *
sizeOfRowReturnedFromTable);
return costForTable;
}
////////////////////////////////////////
// -----------------------------------------------------------------------
// Methods for class AccessPathAnalysis
// -----------------------------------------------------------------------
AccessPathAnalysis::AccessPathAnalysis
(IndexDesc* indexDesc,
TableAnalysis* tableAnalysis,
CollHeap *outHeap):
indexDesc_(indexDesc),
tableAnalysis_(tableAnalysis),
accessPathKeyCount_(0),
heap_(outHeap)
{
if(indexDesc_)
{
// setup index cols list
ValueIdList indexCols = indexDesc_->getIndexColumns();
ValueIdList keyCols = indexDesc_->getIndexKey();
for(UInt32 i =0;
i < indexCols.entries();
i++)
{
ItemExpr *ie = indexCols[i].getItemExpr();
accessPathColList_.insert(((IndexColumn *) ie)->getDefinition());
}
for(UInt32 i =0;
i < keyCols.entries();
i++)
{
ItemExpr *ie = keyCols[i].getItemExpr();
accessPathKeyCols_.insert(((IndexColumn *) ie)->getDefinition());
}
// the set of columns in the access path
accessPathColSet_.insertList(accessPathColList_);
// get the column count of the access path key
accessPathKeyCount_ = indexDesc_->getIndexKey().entries();
// setup part key cols list
ValueIdList partKeyCols = indexDesc_->getPartitioningKey();
for(UInt32 i =0;
i < partKeyCols.entries();
i++)
{
ItemExpr *ie = partKeyCols[i].getItemExpr();
accessPathPartKeyCols_.insert(((IndexColumn *) ie)->getDefinition());
}
}
}
NABoolean AccessPathAnalysis::isIndexOnly()
{
// xxx May be I should cache the result of this method if turn out
// to be frequently asked.
const ValueIdSet & usedCols =
indexDesc_->getPrimaryTableDesc()->getTableAnalysis()->getUsedCols();
return accessPathColSet_.contains(usedCols);
}
Int32 AccessPathAnalysis::numIndexPrefixCovered(const ValueIdSet& vidSet,
NABoolean exactMatch,
NABoolean useKeyCols)
{
// find all the equality columns
ValueIdSet equalityCols;
vidSet.findAllEqualityCols(equalityCols);
// Intersect the equality columns with the set of used columns
// to get the columns for only this table.
const ValueIdSet& usedCols =
indexDesc_->getPrimaryTableDesc()->getTableAnalysis()->getUsedCols();
// get the use equality columns
equalityCols.intersectSet((ValueIdSet&)usedCols);
// Get the list of index columns and check if a prefix is
// covered in the provided set.
Int32 numCoveredCols = useKeyCols ?
accessPathKeyCols_.prefixCoveredInSet(equalityCols) :
accessPathColList_.prefixCoveredInSet(equalityCols) ;
if (numCoveredCols &&
((!exactMatch) || numCoveredCols == vidSet.entries()))
return numCoveredCols;
else
return 0;
}
NABoolean AccessPathAnalysis::isPartKeyCovered(const ValueIdSet& vidSet,
NABoolean exactMatch)
{
// find all the equality columns
ValueIdSet equalityCols;
vidSet.findAllEqualityCols(equalityCols);
// Intersect the equality columns with the set of used columns
// to get the columns for only this table.
const ValueIdSet& usedCols =
indexDesc_->getPrimaryTableDesc()->getTableAnalysis()->getUsedCols();
// get the used equality cols
equalityCols.intersect((ValueIdSet&)usedCols);
// check if part key cols are covered in the provided set.
return accessPathPartKeyCols_.allMembersCoveredInSet(equalityCols, exactMatch);
}
NABoolean AccessPathAnalysis::keyCoveredByEqualityPreds
(const ValueIdSet& vidSet, NABoolean *hasPrefix)
{
Int32 keyPrefixCovered = numIndexPrefixCovered(vidSet, FALSE,
hasPrefix?TRUE:FALSE);
if (hasPrefix)
*hasPrefix = (keyPrefixCovered > 0);
if(keyPrefixCovered == accessPathKeyCount_)
return TRUE;
return FALSE;
}
const NAString AccessPathAnalysis::getText() const
{
NAString result("AccessPathAnalysis: \n");
result += "Index: " + indexDesc_->getExtIndexName() + " ";
result += "Index Columns: " + valueIdSetGetText(indexDesc_->getIndexColumns());
return result;
}
void AccessPathAnalysis::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// Methods for class JBBItem
// -----------------------------------------------------------------------
// Get all dependent predicates from other JBBItem on this JBBItem.
ValueIdSet JBBItem::successorPredsOfOther(const JBBItem & other) const
{
ValueIdSet result = getPredsWithSuccessors();
result.intersectSet(other.getPredsWithPredecessors());
return result;
}
// Get all dependent predicates from this JBBItem on other JBBItem.
ValueIdSet JBBItem::predecessorPredsOnOther(const JBBItem & other) const
{
ValueIdSet result = getPredsWithPredecessors();
result.intersectSet(other.getPredsWithSuccessors());
return result;
}
// -----------------------------------------------------------------------
// Methods for class JBBC
// -----------------------------------------------------------------------
// Set the join preds to this JBBC.
// This will also update predAnalysis of these preds of this JBBC
void JBBC::setJoinPreds(const ValueIdSet & joinPreds)
{
joinPreds_ = joinPreds;
for (ValueId pred = joinPreds_.init();
joinPreds_.next(pred);
joinPreds_.advance(pred))
{
// This method will create PredAnalysis for this value if none has
// been created so far.
PredAnalysis* predAnalysis =
QueryAnalysis::Instance()->findPredAnalysis(pred);
predAnalysis->addToReferencedJBBCs(getId());
}
}
// Not used - we are using the JBBSubset version of this
ValueIdSet JBBC::joinPredsWithOther(const JBBC & other) const
{
ValueIdSet predSet = getJoinPreds();
predSet.intersectSet(other.getJoinPreds());
JBBSubset combinedSubset;
combinedSubset.addJBBC(getId());
combinedSubset.addJBBC(other.getId());
ValueIdSet result;
for (ValueId pred = predSet.init();
predSet.next(pred);
predSet.advance(pred))
{
if (combinedSubset.coversExpr(pred))
result += pred;
}
return result;
}
void JBBC::setPredsWithDependencies(const ValueIdSet & predsWithDependencies,
const ValueIdSet & predsWithPredecessors)
{
// set predsWithPredecessors_
predsWithPredecessors_ += predsWithPredecessors;
predsWithPredecessorsList_ =
new (CmpCommon::statementHeap()) ValueIdList(predsWithPredecessors_);
predecessorJBBCsList_ =
new (CmpCommon::statementHeap())
LIST(CANodeIdSet)(CmpCommon::statementHeap(),
predsWithPredecessorsList_->entries());
// set constPredsWithPredecessors_
NABoolean strict = FALSE;
for (ValueId predId= predsWithPredecessors_.init();
predsWithPredecessors_.next(predId);
predsWithPredecessors_.advance(predId) )
{
ItemExpr * predExpr = predId.getItemExpr();
if(predExpr && predExpr->doesExprEvaluateToConstant(strict))
constPredsWithPredecessors_ += predId;
}
// set predsWithSuccessors_
// get characterisic outputs of JBBCs
const GroupAttributes * jbbcGA = getNodeAnalysis()->
getOriginalExpr()->
getGroupAttr();
ValueIdSet jbbcCharOutput = jbbcGA->getCharacteristicOutputs();
jbbcCharOutput += (ValueIdSet) nullInstantiatedOutput();
ValueIdSet potentialPredsWithSuccessors = predsWithDependencies;
potentialPredsWithSuccessors -= predsWithPredecessors_;
predsWithSuccessors_.
accumulateReferencingExpressions(potentialPredsWithSuccessors,
jbbcCharOutput);
}
// Get all JBBCs that are have no join predicate with this JBBC.
CANodeIdSet JBBC::getJBBCsThatXProductWithMe() const
{
CANodeIdSet result = jbb_->getMainJBBSubset().getJBBCs();
result.subtractElement(getId());
result.subtractSet(joinedJBBCs_);
result.subtractSet(successorJBBCs_);
result.subtractSet(predecessorJBBCs_);
return result;
}
CANodeIdSet JBBC::getJBBCsConnectedViaKeyJoins()
{
if(jbbcsJoinedViaTheirKeyIsSet_)
return jbbcsJoinedViaTheirKey_;
CANodeIdSet joinedJBBCs = joinedJBBCs_;
CANodeIdSet jbbcsJoinedViaTheirKey;
for (CANodeId joinedJBBCNode = joinedJBBCs.init();
joinedJBBCs.next(joinedJBBCNode);
joinedJBBCs.advance(joinedJBBCNode) )
{
TableAnalysis * tAnalysis = joinedJBBCNode.
getNodeAnalysis()->
getTableAnalysis();
if (!tAnalysis)
continue;
TableDesc * tDesc = tAnalysis->getTableDesc();
ValueIdSet primaryKey = tDesc->getPrimaryKeyColumns();
JBBC * joinedJBBC = joinedJBBCNode.
getNodeAnalysis()->
getJBBC();
// JBBC is joined via Left or Semi Join
if (joinedJBBC->getPredecessorJBBCs().entries())
continue;
ValueIdSet joinedCols =
tAnalysis->getColsConnectedViaEquiJoinPreds(getId());
if(joinedCols.contains(primaryKey))
jbbcsJoinedViaTheirKey += joinedJBBCNode;
}
jbbcsJoinedViaTheirKey_ = jbbcsJoinedViaTheirKey;
jbbcsJoinedViaTheirKeyIsSet_ = TRUE;
return jbbcsJoinedViaTheirKey_;
}
/* Do not inspect this method */
// Returns a subset of getJoinedJBBCs that are joined with this JBBC
// on this particular column
CANodeIdSet JBBC::getJoinedJBBCsOnThisColumn(const ValueId & col,
ValueIdSet& joiningPreds /*OUT*/) const
{
xxx();
return CANodeIdSet(heap_);
}
/* Do not inspect this method */
// Returns a subset of getJoinedJBBCs that are joined with this JBBC
// on these particular columns
CANodeIdSet JBBC::getJoinedJBBCsOnTheseColumns(const ValueIdSet & cols,
ValueIdSet & joiningPreds /*OUT*/) const
{
xxx();
return CANodeIdSet(heap_);
}
NABoolean JBBC::isGuaranteedEqualizer()
{
if(isGuaranteedEqualizer_ == -1)
{
isGuaranteedEqualizer_ = 0;
if(parentIsLeftJoin() &&
!getSuccessorJBBCs().entries() &&
!getJoinedJBBCs().entries() &&
(getPredecessorJBBCs().entries()==1))
{
ValueIdSet leftJoinPreds = getPredsWithPredecessors();
ValueIdSet leftJoinedCols;
leftJoinPreds.findAllReferencedBaseCols(leftJoinedCols);
TableAnalysis * leftJoinedTA = getNodeAnalysis()->
getTableAnalysis();
if(leftJoinedTA)
{
TableDesc * leftJoinedTDesc = leftJoinedTA->getTableDesc();
ValueIdSet primaryKeyCols = leftJoinedTDesc->getPrimaryKeyColumns();
if(leftJoinedCols.contains(primaryKeyCols))
isGuaranteedEqualizer_ = 1;
}
}
}
return isGuaranteedEqualizer_;
}
NABoolean JBBC::hasNonExpandingJoin()
{
if(hasNonExpandingJoin_ != -1)
return hasNonExpandingJoin_;
Join * jbbcParentJoin = getOriginalParentJoin();
if(jbbcParentJoin &&
(jbbcParentJoin->isSemiJoin() ||
jbbcParentJoin->isAntiSemiJoin()))
return TRUE;
ValueIdSet joinPreds = getJoinPreds();
joinPreds += getPredsWithPredecessors();
NodeAnalysis * jbbcNodeAnalysis = getNodeAnalysis();
RelExpr * jbbcExpr = jbbcNodeAnalysis->getOriginalExpr();
GroupAttributes * jbbcGA = jbbcExpr->getGroupAttr();
ValueIdSet equalityCols;
for (ValueId x = joinPreds.init();
joinPreds.next(x);
joinPreds.advance(x) )
{
ItemExpr * ie = x.getItemExpr();
OperatorType ot = ie->getOperator();
if(ot == ITM_VEG_PREDICATE)
{
VEGPredicate * vegPred = (VEGPredicate *) ie;
equalityCols.insert(vegPred->getVEG()->getVEGReference()->getValueId());
}
}
// add predicates that did not become a VEG and are against a single JBBC
// this should capture something like t1.a = t2.a + 1
// this will miss something like t1.a = t2.a + t3.a
if(jbbcNodeAnalysis->getTableAnalysis())
{
equalityCols += jbbcNodeAnalysis->getTableAnalysis()->getEqualityConnectedCols();
}
hasNonExpandingJoin_ = jbbcGA->isUnique(equalityCols);
return hasNonExpandingJoin_;
}
const NAString JBBC::getText() const
{
NAString result("JBBC # ");
result += istring(caNodeAnalysis_->getId()) + ":\n";
result += "Child of JBB#" + istring(jbb_->getJBBId()) + "\n";
result += "joinedJBBCs_ : " + joinedJBBCs_.getText();
result += "successorJBBCs_ : " + successorJBBCs_.getText();
result += "predecessorJBBCs_ : " + predecessorJBBCs_.getText();
if (origParentJoin_)
result += "origParentJoin_ : " + origParentJoin_->getText();
else
result += "origParentJoin_ : NULL";
result += "\n";
return result;
}
void JBBC::display() const
{
print();
}
void JBBC::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// Methods for class JBBSubset
// -----------------------------------------------------------------------
// Get all the join predicates (no dependency) between this JBBItem
// and the other JBBSubset
ValueIdSet JBBSubset::joinPredsWithOther(const JBBSubset & other) const
{
ValueIdSet predSet = getJoinPreds();
predSet.intersectSet(other.getJoinPreds());
JBBSubset combinedSubset;
combinedSubset.addJBBCs(jbbcs_);
combinedSubset.addJBBCs(other.getJBBCs());
ValueIdSet result;
for (ValueId pred = predSet.init();
predSet.next(pred);
predSet.advance(pred))
{
if (combinedSubset.coversExpr(pred))
result += pred;
}
return result;
}
Join * JBBSubset::getPreferredJoin()
{
if(jbbcs_.entries() < 2)
return NULL;
MultiJoin * subsetMJ = getSubsetMJ();
if(!subsetMJ)
{
CANodeIdSet allJBBCs = getJBB()->getMainJBBSubset().getJBBCs();
JBBSubset * mainJBBSubset = allJBBCs.jbbcsToJBBSubset();
MultiJoin * jbbMJ = mainJBBSubset->getSubsetMJ();
NABoolean reUseMJ = TRUE;
subsetMJ = jbbMJ->createSubsetMultiJoin(*this, reUseMJ);
subsetMJ->synthLogPropWithMJReuse();
}
Join * preferredJoin = subsetMJ->getPreferredJoin();
if(!preferredJoin->isJoinFromMJSynthLogProp())
{
CCMPASSERT(FALSE);
subsetMJ->synthLogPropWithMJReuse();
}
return subsetMJ->getPreferredJoin();
}
// union of both getJBBCs() and getGB()
CANodeIdSet JBBSubset::getJBBCsAndGB() const
{
// I may endup caching this as member (depending on usage frequency)
CANodeIdSet jbbcsAndGB = jbbcs_;
if (gb_ != NULL_CA_ID)
jbbcsAndGB.insert(gb_);
return jbbcsAndGB;
}
// Get all JBBCs that are have no join predicate with this JBBSubset.
CANodeIdSet JBBSubset::getJBBCsThatXProductWithMe() const
{
CANodeIdSet result = getJBB()->getMainJBBSubset().getJBBCs();
result.subtractSet(getJBBCs());
result.subtractSet(getJoinedJBBCs());
result.subtractSet(getSuccessorJBBCs());
result.subtractSet(getPredecessorJBBCs());
return result;
}
// Copy the JBBCs and GB from other JBBSubset.
// This method copy the JBBSubset without copying its heap_ affiliation.
void JBBSubset::copySubsetMembers(const JBBSubset & other)
{
jbbcs_= other.jbbcs_;
gb_ = other.gb_;
// instead of clearing Analysis, its a good idea to copy the pointer
// jbbSubsetAnalysis_ as well. Its either null (so thats like clearing it)
// or already computed which is even better.
jbbSubsetAnalysis_ = other.jbbSubsetAnalysis_;
// Of course we should NOT copy heap_ here.
}
// Subtract the content (JBBCs and GB) of other JBBSubset from this
// JBBSubset. This method side-effect this JBBSubset.
void JBBSubset::subtractSubset(const JBBSubset & other)
{
jbbcs_ -= other.getJBBCs();
if (gb_ == other.getGB())
{
gb_ = NULL_CA_ID;
}
clearAnalysis();
}
// add the content (JBBCs and GB) of other JBBSubset to this
// JBBSubset. This method side-effect this JBBSubset.
void JBBSubset::addSubset(const JBBSubset & other)
{
jbbcs_ += other.getJBBCs();
CANodeId othersGB = other.getGB();
if (othersGB != NULL_CA_ID)
{
CMPASSERT((gb_ == NULL_CA_ID) || (gb_ == othersGB))
gb_ = othersGB;
}
clearAnalysis();
}
// Does this JBBSubset has a mandatory cross product.
// If so,=, then in any join order for this JBBSubset there will be
// a cross product.
// xxx: this function will be moved to JBBSubsetAnalysis
// Note: this method will retire now since numConnectedSubgraphs()
// does its job.
NABoolean JBBSubset::hasMandatoryXP() const
{
if (jbbcs_.entries() < 2)
return FALSE;
CANodeIdSet available(jbbcs_);
CANodeIdSet frontNodes(available.getFirst());
CANodeId currNode;
while ((frontNodes.entries() != 0) && (available.entries() != 0))
{
currNode = frontNodes.getFirst();
available -= currNode;
CANodeIdSet newNodes(currNode.getNodeAnalysis()->getJBBC()->getJoinedJBBCs());
// execlude nodes that are already tested and nodes that are outside
// this JBBSubset jbbcs
newNodes.intersectSet(available);
available -= newNodes;
// now add the newNodes to frontNodes (nodes to be tested) and
// remove currNode from frontNodes;
frontNodes += newNodes;
frontNodes -= currNode;
}
if (available.entries() !=0)
return TRUE;
else
return FALSE;
}
// Calculate number of disjunct subgraphs in this JBBSubset
// xxx: this function will be moved to JBBSubsetAnalysis
Lng32 JBBSubset::numConnectedSubgraphs(NABoolean followSuccessors,
NABoolean pullInPredecessors) const
{
if (jbbcs_.entries() < 1)
return 0;
Lng32 numSubgraphs = 1;
CANodeIdSet available(jbbcs_);
CANodeId frontNode;
// pick a node that does not have predecessors
for(frontNode = available.init();
available.next(frontNode);
available.advance(frontNode))
{
JBBC * frontNodeJBBC = frontNode.getNodeAnalysis()->getJBBC();
CANodeIdSet frontNodePredecessors =
frontNodeJBBC->getPredecessorJBBCs();
if(!frontNodePredecessors.entries())
break;
}
CANodeIdSet frontNodes(frontNode);
CANodeId currNode;
while (available.entries() != 0)
{
currNode = frontNodes.getFirst();
available -= currNode;
CANodeIdSet newNodes(currNode.getNodeAnalysis()->getJBBC()->getJoinedJBBCs());
if(followSuccessors)
newNodes += currNode.getNodeAnalysis()->getJBBC()->getSuccessorJBBCs();
if(pullInPredecessors)
newNodes += currNode.getNodeAnalysis()->getJBBC()->getPredecessorJBBCs();
// execlude nodes that are already tested and nodes that are outside
// this JBBSubset jbbcs
newNodes.intersectSet(available);
available -= newNodes;
// now add the newNodes to frontNodes (nodes to be tested) and
// remove currNode from frontNodes;
frontNodes += newNodes;
frontNodes -= currNode;
// Check to see if we finished all nodes in current subgraph
// In that case start a new subgraph, unless this is the last
// subgraph i.e available set is empty.
if ((frontNodes.entries() == 0) && (available.entries() > 0))
{
frontNodes += available.getFirst();
numSubgraphs++;
}
}
return numSubgraphs;
}
NAList<CANodeIdSet*>* JBBSubsetAnalysis::getConnectedSubgraphs(NABoolean followSuccessors,
NABoolean pullInPredecessors)
{
if (connectedSubgraphs_ && followSuccessors && (!pullInPredecessors))
return connectedSubgraphs_;
NAList<CANodeIdSet*>* connectedSubgraphs = new (heap_) NAList<CANodeIdSet*>(heap_);
//long numSubgraphs = 1;
CANodeIdSet available(jbbcs_);
CANodeId frontNode;
// pick a node that does not have predecessors
for(frontNode = available.init();
available.next(frontNode);
available.advance(frontNode))
{
JBBC * frontNodeJBBC = frontNode.getNodeAnalysis()->getJBBC();
CANodeIdSet frontNodePredecessors =
frontNodeJBBC->getPredecessorJBBCs();
if(!frontNodePredecessors.entries())
break;
}
CANodeIdSet frontNodes(frontNode);
CANodeId currNode;
CANodeIdSet currSubgraph;
while (available.entries() != 0)
{
currNode = frontNodes.getFirst();
currSubgraph += currNode;
available -= currNode;
CANodeIdSet newNodes(currNode.getNodeAnalysis()->getJBBC()->getJoinedJBBCs());
if(followSuccessors)
newNodes += currNode.getNodeAnalysis()->getJBBC()->getSuccessorJBBCs();
if(pullInPredecessors)
newNodes += currNode.getNodeAnalysis()->getJBBC()->getPredecessorJBBCs();
// execlude nodes that are already tested and nodes that are outside
// this JBBSubset jbbcs
newNodes.intersectSet(available);
available -= newNodes;
// add the new nodes to the subgraph
currSubgraph += newNodes;
// now add the newNodes to frontNodes (nodes to be tested) and
// remove currNode from frontNodes;
frontNodes += newNodes;
frontNodes -= currNode;
// Check to see if we finished all nodes in current subgraph
// In that case start a new subgraph, unless this is the last
// subgraph i.e available set is empty.
if ((frontNodes.entries() == 0) && (available.entries() > 0))
{
frontNodes += available.getFirst();
connectedSubgraphs->insert(new (heap_) CANodeIdSet(currSubgraph, heap_));
//numSubgraphs++;
currSubgraph.clear();
}
}
connectedSubgraphs->insert(new (heap_) CANodeIdSet(currSubgraph, heap_));
if (followSuccessors)
connectedSubgraphs_ = connectedSubgraphs;
return connectedSubgraphs;
}
CANodeIdSet JBBSubsetAnalysis::getInputSubgraph(CANodeId node, NABoolean followSuccessors) const
{
CANodeIdSet inputSubgraph;
//long numSubgraphs = 1;
CANodeIdSet available(jbbcs_);
available -= node;
CANodeIdSet frontNodes(node.getNodeAnalysis()->getJBBC()->getPredecessorJBBCs());
frontNodes.intersectSet(available);
CANodeId currNode;
CANodeIdSet currSubgraph;
while (available.entries() != 0)
{
currNode = frontNodes.getFirst();
inputSubgraph += currNode;
available -= currNode;
CANodeIdSet newNodes(currNode.getNodeAnalysis()->getJBBC()->getJoinedJBBCs());
if(followSuccessors)
newNodes += currNode.getNodeAnalysis()->getJBBC()->getSuccessorJBBCs();
newNodes += currNode.getNodeAnalysis()->getJBBC()->getPredecessorJBBCs();
// execlude nodes that are already tested and nodes that are outside
// this JBBSubset jbbcs
newNodes.intersectSet(available);
available -= newNodes;
// add the new nodes to the subgraph
inputSubgraph += newNodes;
// now add the newNodes to frontNodes (nodes to be tested) and
// remove currNode from frontNodes;
frontNodes += newNodes;
frontNodes -= currNode;
// Check to see if we finished all nodes in current subgraph
// In that case start a new subgraph, unless this is the last
// subgraph i.e available set is empty.
if (frontNodes.entries() == 0)
break;
}
return inputSubgraph;
}
CANodeIdSet JBBSubset::getSubtreeTables() const
{
CANodeIdSet result;
for (CANodeId x= jbbcs_.init(); jbbcs_.next(x); jbbcs_.advance(x) )
{
result += x.getNodeAnalysis()->getJBBC()->getSubtreeTables();
}
return result;
}
ValueIdSet CANodeIdSet::getUsedCols() const
{
ValueIdSet usedCols;
usedCols.clear();
for (CANodeId table = init();
next(table);
advance(table) )
{
usedCols += table.getNodeAnalysis()->getTableAnalysis()->getUsedCols();
}
return usedCols;
}
ValueIdSet CANodeId::getUsedTableCols()
{
ValueIdSet usedCols;
usedCols.clear();
if((!getNodeAnalysis()) ||
(!(getNodeAnalysis()->getTableAnalysis())))
return usedCols;
return getNodeAnalysis()->getTableAnalysis()->getUsedCols();
}
ValueIdSet CANodeIdSet::getUsedTableCols()
{
ValueIdSet usedCols;
usedCols.clear();
for (CANodeId table = init();
next(table);
advance(table) )
{
usedCols += table.getUsedTableCols();
}
return usedCols;
}
// ------------------------------------------------------------------------
// get the minimum row count from all JBBCs in the JBBSubset after applying
// local predicates to individual JBBCs
// ------------------------------------------------------------------------
CostScalar JBBSubset::getMinimumJBBCRowCount() const
{
CostScalar minimumRowCount = COSTSCALAR_MAX;
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
for (CANodeId x= jbbcs_.init(); jbbcs_.next(x); jbbcs_.advance(x) )
{
CostScalar cardx = appStatMan->getStatsForCANodeId(x)->getResultCardinality();
if (cardx < minimumRowCount)
minimumRowCount = cardx;
}
return minimumRowCount;
} // JBBSubset::getMinimumJBBCRowCount
NABoolean JBBSubset::isGuaranteedNonExpandingJoin(JBBC jbbc)
{
CANodeIdSet thisSetJBBCs = getJBBCs();
if(!thisSetJBBCs.entries())
return TRUE;
Join * jbbcParentJoin = jbbc.getOriginalParentJoin();
CANodeIdSet successorJBBCs = getSuccessorJBBCs();
if(successorJBBCs.contains(jbbc.getId()) &&
jbbcParentJoin &&
(jbbcParentJoin->isSemiJoin() ||
jbbcParentJoin->isAntiSemiJoin()))
return TRUE;
CANodeIdSet jbbcsThatCanComeBefore =
jbbc.getJoinedJBBCs();
jbbcsThatCanComeBefore +=
jbbc.getPredecessorJBBCs();
jbbcsThatCanComeBefore.intersectSet(thisSetJBBCs);
if(!jbbcsThatCanComeBefore.entries())
return FALSE;
JBBSubset * jbbcSubset =
((CANodeIdSet)jbbc.getId()).jbbcsToJBBSubset();
JBBSubset * connectedJBBCs =
jbbcsThatCanComeBefore.jbbcsToJBBSubset();
ValueIdSet connectingPreds =
connectedJBBCs->joinPredsWithOther(*jbbcSubset);
ValueIdSet jbbcPredsWithPredecessors =
jbbc.getPredsWithPredecessors();
jbbcPredsWithPredecessors.intersectSet(connectedJBBCs->getPredsWithSuccessors());
connectingPreds += jbbcPredsWithPredecessors;
NodeAnalysis * jbbcNodeAnalysis = jbbc.getNodeAnalysis();
RelExpr * jbbcExpr = jbbcNodeAnalysis->getOriginalExpr();
TableAnalysis * jbbcTable = jbbcNodeAnalysis->getTableAnalysis();
ValueIdSet equalityConnectingPreds;
if(jbbcTable)
equalityConnectingPreds = jbbcTable->getEqualityConnectingPreds();
GroupAttributes * jbbcGA = jbbcExpr->getGroupAttr();
ValueIdSet ColVegRefSet;
for (ValueId x = connectingPreds.init();
connectingPreds.next(x);
connectingPreds.advance(x) )
{
ItemExpr * ie = x.getItemExpr();
OperatorType ot = ie->getOperator();
if(ot == ITM_VEG_PREDICATE)
{
VEGPredicate * vegPred = (VEGPredicate *) ie;
ColVegRefSet.insert(vegPred->getVEG()->getVEGReference()->getValueId());
}
if(equalityConnectingPreds.contains(x))
ColVegRefSet.insert(jbbcTable->getEqualityConnectedCol(x));
}
return jbbcGA->isUnique(ColVegRefSet);
}
// verify integrity of the JBBSubset. That is
// jbbcs_ contains only jbbcs from same JBB
// gb_ belongs to a group by of the same JBB
NABoolean JBBSubset::verify()
{
xxx(); // never called for now
return FALSE;
}
const NAString JBBSubset::getText() const
{
NAString result("JBBSubset : {");
for (CANodeId x= jbbcs_.init(); jbbcs_.next(x); jbbcs_.advance(x) )
{
result += " " + istring(x) + " ";
}
if (gb_ != NULL_CA_ID)
result += "; " + istring(gb_);
result += "}\n";
return result;
}
void JBBSubset::display() const
{
print();
}
void JBBSubset::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// Methods for class JBBSubsetAnalysis
// -----------------------------------------------------------------------
// construct a JBBSubsetAnalysis
JBBSubsetAnalysis::JBBSubsetAnalysis(const JBBSubset & subset,
CollHeap *outHeap)
:leftDeepJoinSequence_(CmpCommon::statementHeap(),
subset.getJBBCs().entries()),
matchingMVs_(CmpCommon::statementHeap())
{
heap_ = outHeap;
jbb_ = subset.getJBB();
jbbcs_ = subset.getJBBCs();
subsetMJ_ = NULL;
gb_ = subset.getGB();
allMembers_ = subset.getJBBCsAndGB();
allJoinsInnerNonSemi_ = TRUE;
connectedSubgraphs_ = NULL; // on demand only
mjRulesWA_ = NULL;
mjStarJoinIRuleWA_ = NULL; // this will be created and passed in by
// MJStarJoinIRule
legality_ = NOT_KNOWN;
computedFactAndLargestTable_ = FALSE;
factTable_ = NULL_CA_ID;
largestTable_ = NULL_CA_ID;
computedLargestIndependentNode_ = FALSE;
largestIndependentNode_ = NULL_CA_ID;
starJoinTypeIFeasible_ = FALSE;
analyzedInitialPlan_ = FALSE;
centerTableComputed_ = FALSE;
centerTable_ = NULL_CA_ID;
centerTableRowsScanned_ = -1;
centerTableDataScanned_ = -1;
centerTableDataPerPartition_ = -1;
centerTablePartitions_ = -1;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
listOfEdges_ = NULL;
optimalFTLocation_ = -1;
lowestFactNJCost_ = csZero;
costForMaxKeyFactNJ_ = csZero;
synthLogPropPath_ = NULL;
// Compute other fields
init();
}
// which JBBCs can I add to this subset while reserving its self-dpendency
CANodeIdSet JBBSubsetAnalysis::legalJBBCAdditions() const
{
// for now, all other JBBCs are legal additions
// when we include dependency joins, we need to use
// dependency graph for this.
CANodeIdSet result = jbb_->getMainJBBSubset().getJBBCs();
result.subtractSet(jbbcs_);
return result;
}
// initialize JBBSubsetAnalysis computable fields
void JBBSubsetAnalysis::init()
{
joinedJBBCs_.clear();
successorJBBCs_.clear();
predecessorJBBCs_.clear();
ValueIdSet tmpJoinPreds;
ValueIdSet tmpSuccessorPreds;
ValueIdSet tmpPredecessorPreds;
CANodeId i;
for (i = jbbcs_.init(); jbbcs_.next(i); jbbcs_.advance(i) )
{
JBBC* jbbc = i.getNodeAnalysis()->getJBBC();
joinedJBBCs_ += jbbc->getJoinedJBBCs();
successorJBBCs_ += jbbc->getSuccessorJBBCs();
predecessorJBBCs_ += jbbc->getPredecessorJBBCs();
tmpJoinPreds += jbbc->getJoinPreds();
tmpSuccessorPreds += jbbc->getPredsWithSuccessors();
tmpPredecessorPreds += jbbc->getPredsWithPredecessors();
Join * parentJoin = jbbc->getOriginalParentJoin();
if(parentJoin && !(parentJoin->isInnerNonSemiNonTSJJoin()))
allJoinsInnerNonSemi_ = FALSE;
}
// take subset members out
joinedJBBCs_ -= jbbcs_;
successorJBBCs_ -= jbbcs_;
predecessorJBBCs_ -= jbbcs_;
joinPreds_.clear();
predsWithSuccessors_.clear();
predsWithPredecessors_.clear();
// now loop over joinedJBBCs_ etc to compute joinPreds_ etc
for (i = joinedJBBCs_.init();
joinedJBBCs_.next(i);
joinedJBBCs_.advance(i) )
{
JBBC* jbbc = i.getNodeAnalysis()->getJBBC();
ValueIdSet currentPreds = jbbc->getJoinPreds();
// include joinPreds from current JBBC that are
// joinPreds for some JBBC in this JBBSubset
joinPreds_ += currentPreds.intersectSet(tmpJoinPreds);
}
for (i = successorJBBCs_.init();
successorJBBCs_.next(i);
successorJBBCs_.advance(i) )
{
JBBC* jbbc = i.getNodeAnalysis()->getJBBC();
ValueIdSet currentPreds = jbbc->getPredsWithPredecessors();
// include myDependentPreds from current JBBC that are
// dependentPreds for some JBBC in this JBBSubset
predsWithSuccessors_ += currentPreds.intersectSet(tmpSuccessorPreds);
}
for (i = predecessorJBBCs_.init();
predecessorJBBCs_.next(i);
predecessorJBBCs_.advance(i) )
{
JBBC* jbbc = i.getNodeAnalysis()->getJBBC();
ValueIdSet currentPreds = jbbc->getPredsWithSuccessors();
// include dependentPreds from current JBBC that are
// myDependentPreds for some JBBC in this JBBSubset
predsWithPredecessors_ += currentPreds.intersectSet(tmpPredecessorPreds);
}
if(jbbcs_.entries() == 1)
{
synthLogPropPath_ =
new (CmpCommon::statementHeap())
CASortedList(CmpCommon::statementHeap(), jbbcs_.entries());
(*synthLogPropPath_).insert(jbbcs_.getFirst());
}
// xxx consider making this on demand only
computeLocalJoinPreds();
}
// compute the join preds between these subset members
void JBBSubsetAnalysis::computeLocalJoinPreds()
{
ValueIdSet accumulatedJoinPreds;
ValueIdSet currentJoinPreds;
ValueIdSet newLocalJoinPreds;
ValueIdSet multipleReferenceJoinPreds;
ValueIdSet allSuccessorPreds;
ValueIdSet allPredecessorPreds;
ValueIdSet allConstPredsWithPredecessors;
ValueIdSet allLeftJoinFilterPreds;
// First we try to compute multipleReferenceJoinPreds which are join preds
// that are referencing more than 1 JBBC is this subset.
CANodeId i;
for (i = jbbcs_.init(); jbbcs_.next(i); jbbcs_.advance(i) )
{
JBBC* jbbc = i.getNodeAnalysis()->getJBBC();
currentJoinPreds = jbbc->getJoinPreds();
newLocalJoinPreds = currentJoinPreds;
multipleReferenceJoinPreds += newLocalJoinPreds.intersectSet(accumulatedJoinPreds);
accumulatedJoinPreds += currentJoinPreds;
allSuccessorPreds += jbbc->getPredsWithSuccessors();
allPredecessorPreds += jbbc->getPredsWithPredecessors();
allConstPredsWithPredecessors += jbbc->getConstPredsWithPredecessors();
allLeftJoinFilterPreds += jbbc->getLeftJoinFilterPreds();
}
// update multipleReferencePreds to exclude nonInnerJoinPreds
// this is done since for nonInnerJoinPreds the VEG in the VEGPred
// is only valid on the right side of the join
multipleReferenceJoinPreds -= allSuccessorPreds;
multipleReferenceJoinPreds -= allPredecessorPreds;
multipleReferenceJoinPreds -= allConstPredsWithPredecessors;
multipleReferenceJoinPreds -= allLeftJoinFilterPreds;
// It is now guaranteed that each pred in multipleReferenceJoinPreds references
// at least two JBBCs. This is necessary but not suffecient condition since it
// is possible that the pred needs other values not supplied by any JBBC in
// this JBBSubset. We filter those using the coverExpr(pred) test.
localInnerNonSemiJoinPreds_.clear();
for (ValueId pred = multipleReferenceJoinPreds.init();
multipleReferenceJoinPreds.next(pred);
multipleReferenceJoinPreds.advance(pred))
{
// if pred is a VEG then add it to the cache
if ((pred.getItemExpr()->getOperatorType() == ITM_VEG_PREDICATE)||
coversExpr(pred))
{
knownCovered_+=pred;
localInnerNonSemiJoinPreds_ += pred;
}
}
// localInnerNonSemiJoinPreds_ is now computed
// consider predicates with dependencies
localDependentJoinPreds_.clear();
ValueIdSet allDependentPreds;
allDependentPreds +=
allPredecessorPreds.intersectSet(allSuccessorPreds);
allDependentPreds +=
allConstPredsWithPredecessors;
allDependentPreds +=
allLeftJoinFilterPreds;
for (ValueId pred = allDependentPreds.init();
allDependentPreds.next(pred);
allDependentPreds.advance(pred))
{
// if pred is a VEG then add it to the cache
if (//(pred.getItemExpr()->getOperatorType() == ITM_VEG_PREDICATE)||
coversExpr(pred))
{
knownCovered_+=pred;
localDependentJoinPreds_ += pred;
}
}
// localDependentJoinPreds_ is now computed
localJoinPreds_.clear();
localJoinPreds_ += localInnerNonSemiJoinPreds_;
localJoinPreds_ += localDependentJoinPreds_;
// localJoinPreds_ is now computed
}
NABoolean JBBSubsetAnalysis::coversExpr(ValueId vid) const
{
// first check the known covered and not covered caches
if(knownCovered_.contains(vid))
return TRUE;
if(knownNotCovered_.contains(vid))
return FALSE;
// have to do this since we don't want to change the signature to remove
// the const. The two data members that can be potentially modified are
// there to cache the result, since a given vid should always return the
// same result
ValueIdSet * knownCovered = const_cast<ValueIdSet*> (&knownCovered_);
ValueIdSet * knownNotCovered = const_cast<ValueIdSet*> (&knownNotCovered_);
// if this Expr is already one of my local join predicates then
// it is covered already. xxx add this to dependent local preds
// in the future.
// Note: Every localJoinPred is covered by the JBBSubset but *not*
// every covered pred is part of the localJoinPred. For example
// a covered subexpression which is evaluated at the JBBC level.
if (getLocalJoinPreds().contains(vid))
{
(*knownCovered)+=vid;
return TRUE;
}
NABoolean isAVEGPred = FALSE;
ValueId vegRefId;
if(vid.getItemExpr()->getOperatorType() == ITM_VEG_PREDICATE)
{
isAVEGPred = TRUE;
vegRefId = ((VEGPredicate*)(vid.getItemExpr()))->getVEG()->getVEGReference()->getValueId();
}
// Otherwise we do the actual check for covering. We uses the
// isCovered() method on the ItemExpr for that. Use the combined
// JBBCs outputs for output and the JBB normInputs for inputs.
ValueIdSet maximumOutput;
for (CANodeId x= jbbcs_.init(); jbbcs_.next(x); jbbcs_.advance(x) )
{
JBBC * jbbc = x.getNodeAnalysis()->getJBBC();
Join * parentJoin = jbbc->getOriginalParentJoin();
if(!parentJoin || (parentJoin->isInnerNonSemiJoin() ||
jbbc->getPredsWithPredecessors().contains(vid)))
{
ValueIdSet childOutputs =
x.getNodeAnalysis()->getOriginalExpr()->getGroupAttr()->getCharacteristicOutputs();
if(jbbc->getPredsWithPredecessors().contains(vid) && isAVEGPred)
{
if(childOutputs.contains(vegRefId))
childOutputs -= vegRefId;
}
maximumOutput += childOutputs;
}
// have to add the null instantiated values if the JBBC is
// connected via a left outer join. If a JBBC is connected
// via a left outer join then it has nullInstantiatedOutput
// otherwise nullInstantiatedOutput is empty. This is because
// the nullInstantiated output is not part of the JBBC's group
// characteristic output, rather it is the output of the left
// join.
ValueIdSet nullInstantiatedOutputs(x.getNodeAnalysis()->
getJBBC()->
nullInstantiatedOutput());
maximumOutput += nullInstantiatedOutputs;
}
if(maximumOutput.contains(vid))
{
(*knownCovered)+=vid;
return TRUE;
}
ValueIdSet inputs = jbb_->getNormInputs();
ValueIdSet dummyNewExternalInputs;
ValueIdSet dummyReferencedInputs;
ValueIdSet dummyCoveredSubExpr;
ValueIdSet dummyUnCoveredExpr;
GroupAttributes* dummyGA =
new (CmpCommon::statementHeap()) GroupAttributes();
dummyGA->setCharacteristicOutputs(maximumOutput);
dummyGA->setCharacteristicInputs(inputs);
if (vid.getItemExpr()->isCovered
(dummyNewExternalInputs,
*dummyGA,
dummyReferencedInputs,
dummyCoveredSubExpr,
dummyUnCoveredExpr))
{
(*knownCovered)+=vid;
return TRUE;
}
(*knownNotCovered)+=vid;
return FALSE;
}
void JBBSubsetAnalysis::setSubsetMJ(MultiJoin * subsetMJ)
{
if(!jbbcs_.entries())
subsetMJ_ = NULL;
CMPASSERT(subsetMJ);
CANodeIdSet mjJBBCs = subsetMJ->getJBBSubset().getJBBCs();
if(jbbcs_ == mjJBBCs)
subsetMJ_ = subsetMJ;
}
// Used to be used by LargeScopeRules old topMatch method of MJStarJoinIRule,
// not used anymore this code is OFF by default
MJRulesWA * JBBSubsetAnalysis::getMJRulesWA()
{
if(!mjRulesWA_)
mjRulesWA_ = new (CmpCommon::statementHeap()) MJRulesWA(this);
return mjRulesWA_;
}
CANodeId MJRulesWA::computeCenterTable()
{
if(centerTableComputed_)
{
return centerTable_;
}
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = analysis_->getJBBCs();
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//n is the number of jbbcs (i.e. tables) in this multijoin
UInt32 n = childSet.entries();
CANodeId center = NULL_CA_ID;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "There are "<<n<<" jbbcs"<<endl;
}
#endif //_DEBUG
// require atleast 3 tables
if (n < 3)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
// variable used in iteration
CANodeId currentTable;
// get a set of single row tables
CANodeIdSet singleRowTables;
// go over all the tables in the query
// and create a set of tables that are
// returning just one row
for(currentTable = childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
// get number of tables in this JBBC
Int32 numSubtreeTables = currentTable.getNodeAnalysis()
->getJBBC()
->getSubtreeTables().entries();
CostScalar jbbcCardinality = appStatMan->
getStatsForCANodeId(currentTable)->
getResultCardinality();
//currentTable.getNodeAnalysis()->getCardinality();
if((jbbcCardinality == 1) &&
(numSubtreeTables < 2))
singleRowTables.insert(currentTable);
}
n = n - singleRowTables.entries();
if( n < 3)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "SingleRow JBBCs are "<<singleRowTables.getText()<<endl;
}
#endif //_DEBUG
// minimum connectivity for center
UInt32 minimumCenterConnectivity = n-1;
// maximum connectivity for dimensions
UInt32 maximumDimensionConnectivity = 1;
// connectivity of the center / fact table
UInt32 centerConnectivity = 0;
// gets the maximum number of tables connected
// connected to a dimension table
UInt32 maxDimConnectivity = 0;
// figure out the parameters for matching the connectivity pattern
switch (n)
{
case 3:
case 4:
// for n == 4 or n==3
minimumCenterConnectivity = n-1;
maximumDimensionConnectivity = 1;
break;
case 5:
// for n == 5
minimumCenterConnectivity = n-2;
maximumDimensionConnectivity = 2;
break;
default:
// for n >= 6
minimumCenterConnectivity = n-3;
// Changed COMP_INT_0 to COMP_INT_20 to avoid conflict with Nice context changes, with this
// Nice Context feature is OFF by default. (06/07/06)
maximumDimensionConnectivity = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_20);
break;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern match parameters are: "<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Min center connectivity: "<<minimumCenterConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Max dimension connectivity: "<<maximumDimensionConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif //_DEBUG
// get the fact table name if a fact table has been forced
const char* factTableName = ActiveSchemaDB()->getDefaults().
getValue(COMP_STRING_1);
// variable that tells us that the
// table we want to be the fact table
// is found in the multijoin
Int32 centerTableForced = 0;
// The loop below tries to match the connectivity pattern
// iterate over all the jbbcs (i.e. tables)
// in this multijoin
// In each iteration
// Figure out:
// * if this table is the center/fact table
// * if this table is a dimension table
for(currentTable = childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
// if factTableName is NONE (default value),
// then we don't want to force a particular
// table to be the fact table
if(stricmp(factTableName,"NONE") != 0)
{
//if this is not a table move on to the next node
if(!currentTable.getNodeAnalysis()->getTableAnalysis())
{
// compare to currentTable's name
// if comparison is successful we get a zero
// therefore reverse the result
centerTableForced = !(currentTable.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getCorrNameObj().\
getCorrNameAsString().\
compareTo(factTableName,
NAString::ignoreCase));
// we found a the table that we wanted
// force as the fact table
if (centerTableForced)
{
center = currentTable;
centerConnectivity = center.getNodeAnalysis()->
getJBBC()->
getJoinedJBBCs().
entries();
}
}
}
//all JBBCs connect to current table
CANodeIdSet connectedChildren = currentTable.getNodeAnalysis()->getJBBC()->getJoinedJBBCs();
// get number of tables connected to current jbbc
UInt32 numConnectedChildren = connectedChildren.entries();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBC: "<<currentTable.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "connected jbbcs: "<<connectedChildren.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif //_DEBUG
// if current jbbc is:
// * connected to at least minimumCenterConnectivity jbbcs
// * has a connectivity greater than the connectivity
// of the current center/fact table
// then set this jbbc to be the center/fact table
if ((numConnectedChildren >= minimumCenterConnectivity) &&
(centerConnectivity < numConnectedChildren) &&
(currentTable.getNodeAnalysis()->getTableAnalysis()) &&
(!centerTableForced))
{
// see we are going have found a new center
// the previous center should be a dimension
// make sure it is not connected to more than
// the allowed number of connected tables for
// dimension tables.
// If the previous center is connected to a
// greater number of tables than allowed for
// a dimension table then fail pattern match
if (centerConnectivity > maximumDimensionConnectivity)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
center = currentTable;
centerConnectivity = numConnectedChildren;
}
else{
// this table is potentially a dimension table
// make sure it is not connected to more than
// the allowed number of tables
// Ignore tables that return 1 row when counting
// the connected tables
UInt32 numNonSingleRowConnectedChildren =
connectedChildren.subtractSet(singleRowTables).entries();
// if the number of connected children is greater than
// the maximum connectivity of any dimension table upto
// this point, then update the maximum connectivity
if (numNonSingleRowConnectedChildren > maxDimConnectivity)
maxDimConnectivity = numNonSingleRowConnectedChildren;
// if this table is connected to more than the maximum
// number of tables allowed for a dimension table
// then fail the pattern match
if ( numNonSingleRowConnectedChildren > maximumDimensionConnectivity)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
}
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
if(center != NULL_CA_ID)
{
CURRCONTEXT_OPTDEBUG->stream() << "Center Table is: "<<center.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center connectivity is: "<<centerConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Max dimension connectivity is: "<<maxDimConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
else{
CURRCONTEXT_OPTDEBUG->stream() << "No center table found "<<center.getText()<<endl;
}
}
#endif //_DEBUG
// we did not find a center table
if (center == NULL_CA_ID)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
// get rows to be scanned for center table
CostScalar centerTableRowsToBeScanned =
appStatMan->getStatsForLocalPredsOnCKPOfJBBC(center)->
getResultCardinality();
// get record size for center table
CostScalar centerTableRecordSize =
center.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
// compute amount of data to be scanned from center table
// amount is in KB
CostScalar centerTableDataToBeScanned =
(centerTableRowsToBeScanned * centerTableRecordSize) / 1024;
// get number of partitions for the center table
CostScalar numCenterTablePartitions =
center.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getClusteringIndex()->
getNAFileSet()->
getCountOfPartitions();
CostScalar sizePerPartition = centerTableDataToBeScanned / numCenterTablePartitions;
centerTableComputed_ = TRUE;
centerTable_ = center;
centerTableConnectivity_ = centerConnectivity;
maxDimensionConnectivity_ = maxDimConnectivity;
centerTableRowsScanned_ = centerTableRowsToBeScanned;
centerTableDataScanned_ = centerTableDataToBeScanned;
centerTablePartitions_ = numCenterTablePartitions;
centerTableDataPerPartition_ = sizePerPartition;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Rows Scanned: "\
<<centerTableRowsScanned_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Record Size: "\
<<centerTableRecordSize.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Data Scanned: "\
<<centerTableDataScanned_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Num partitions: "\
<<centerTablePartitions_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table size per partition: "\
<<centerTableDataPerPartition_.getValue()<<endl;
}
#endif //_DEBUG
return centerTable_;
}
CASortedList * JBBSubsetAnalysis::getNodesSortedByLocalPredsCard()
{
// get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
JBBWA * jbbWA = jbb_->getJBBWA();
const CASortedList * sortedListOfNodes =
jbbWA->
getNodesSortedByLocalPredsCard();
CASortedList * result = new (CmpCommon::statementHeap())
CASortedList(CmpCommon::statementHeap(), childSet.entries());
for( UInt32 i = 0;
i < sortedListOfNodes->entries();
i++)
{
if(childSet.contains((*sortedListOfNodes)[i]))
(*result).insert((*sortedListOfNodes)[i]);
}
return result;
}
CASortedList * JBBSubsetAnalysis::getNodesSortedByLocalKeyPrefixPredsCard()
{
// get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
JBBWA * jbbWA = jbb_->getJBBWA();
const CASortedList * sortedListOfNodes =
jbbWA->
getNodesSortedByLocalKeyPrefixPredsCard();
CASortedList * result = new (CmpCommon::statementHeap())
CASortedList(CmpCommon::statementHeap(), childSet.entries());
for( UInt32 i = 0;
i < sortedListOfNodes->entries();
i++)
{
if(childSet.contains((*sortedListOfNodes)[i]))
(*result).insert((*sortedListOfNodes)[i]);
}
return result;
}
CANodeId JBBSubsetAnalysis::getLargestNode()
{
// get CANodeIdSet representing this JBBSubset
CANodeIdSet childSet = getJBBCs();
JBBWA * jbbWA = jbb_->getJBBWA();
const CASortedList * sortedListOfNodes = jbbWA->getNodesSortedByOutputData();
for( UInt32 i = 0;
i < sortedListOfNodes->entries();
i++)
{
if(childSet.contains((*sortedListOfNodes)[i]))
return (*sortedListOfNodes)[i];
}
return NULL_CA_ID;
}
CANodeId JBBSubsetAnalysis::getLargestIndependentNode()
{
if(computedLargestIndependentNode_)
return largestIndependentNode_;
// get CANodeIdSet representing this JBBSubset
CANodeIdSet childSet = getJBBCs();
JBBWA * jbbWA = jbb_->getJBBWA();
const CASortedList * sortedListOfNodes = jbbWA->getNodesSortedByOutputData();
for( UInt32 i = 0;
i < sortedListOfNodes->entries();
i++)
{
if(childSet.contains((*sortedListOfNodes)[i]))
{
CANodeId nodeId = (*sortedListOfNodes)[i];
JBBC * nodeJBBC = nodeId.getNodeAnalysis()->getJBBC();
Join * parentJoin = nodeJBBC->getOriginalParentJoin();
if ((!parentJoin) ||
(parentJoin->isInnerNonSemiNonTSJJoin()))
{
largestIndependentNode_ = nodeId;
computedLargestIndependentNode_ = TRUE;
return nodeId;
}
/*
if(!(nodeJBBC->getPredecessorJBBCs().entries()))
return nodeId;
*/
}
}
largestIndependentNode_ = NULL_CA_ID;
computedLargestIndependentNode_ = TRUE;
return NULL_CA_ID;
}
void JBBSubsetAnalysis::analyzeInitialPlan(MultiJoin * mjoin)
{
CMPASSERT(mjoin);
// get the subset analysis for this MultiJoin
JBBSubsetAnalysis * subsetAnalysis = mjoin->getJBBSubset().getJBBSubsetAnalysis();
// makesure it is the same as 'this'
CMPASSERT(this==subsetAnalysis);
analyzeInitialPlan();
}
void JBBSubsetAnalysis::analyzeInitialPlan()
{
if(analyzedInitialPlan_)
return;
CANodeId factTable = NULL_CA_ID;
CANodeId largestTable = NULL_CA_ID;
CostScalar factTableCKPrefixCardinality;
factTable = findFactAndLargestTable(factTableCKPrefixCardinality,
largestTable);
// if we did not find a fact table then look for the center table
if (factTable == NULL_CA_ID)
{
CANodeId centerTable = computeCenterTable();
CostScalar centerTableSize = getCenterTableSize();//centerTableDataScanned_;
CostScalar centerTableSizePerPartition =
getCenterTableSizePerPartition();//centerTableDataPerPartition_;
CostScalar centerTablePartitions = getCenterTablePartitions();//centerTablePartitions_;
if(centerTable != NULL_CA_ID)
{
JBBC * centerTableJBBC = centerTable.getNodeAnalysis()->getJBBC();
if(!(centerTableJBBC->getPredecessorJBBCs().entries()))
{
if(centerTable == largestTable)
factTable = centerTable;
else
{
if((centerTableSizePerPartition >
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_16)*1024))||
(centerTablePartitions >
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_17)))
{
factTable = centerTable;
}
}
}
}
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "FactTable: " << factTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif
// -----------------------------------------------------
// Begin: Find if there is a full outer join or TSJ node
// -----------------------------------------------------
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
NABoolean hasFullOuterJoinOrTSJ = FALSE;
CANodeId currentNode = NULL_CA_ID;
for(currentNode=childSet.init();
childSet.next(currentNode);
childSet.advance(currentNode))
{
JBBC * currentJBBC = currentNode.getNodeAnalysis()->getJBBC();
if (currentJBBC->isFullOuterJoinOrTSJJBBC() &&
!(currentJBBC->getOriginalParentJoin()))
{
hasFullOuterJoinOrTSJ = TRUE;
}
}
// ---------------------------------------------
// End: Find if there is a full outer join node
// ---------------------------------------------
// check to see if this subset matches a star pattern
// this means that the fact table has good key prefix
// access via some dimension tables
// If there is a good key prefix coverage this method
// will find the set of dimension tables that should
// be used to probe the fact table.
if ((!hasFullOuterJoinOrTSJ) &&
(factTable != NULL_CA_ID ) &&
isAStarPattern(factTable, factTableCKPrefixCardinality))
{
// for debugging only
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis StarJoinTypeI feasible" <<endl;
}
#endif //_DEBUG
factTable_ = factTable;
setStarJoinTypeIFeasible();
arrangeTablesAfterFactForStarJoinTypeI();
}
else{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis StarJoinTypeI not feasible" <<endl;
}
#endif //_DEBUG
arrangeTablesAfterFactForStarJoinTypeII();
}
// recursively call analyzeInitialPlan on any children
// that are JBBSubsets i.e. MultiJoins
UInt32 numJoinChildren = leftDeepJoinSequence_.entries();
for (UInt32 i = 0; i < numJoinChildren; i++)
{
CANodeIdSet joinChild = leftDeepJoinSequence_[i];
if (joinChild.entries() > 1)
{
JBBSubset * childSet = joinChild.jbbcsToJBBSubset();
childSet->
getJBBSubsetAnalysis()->
analyzeInitialPlan();
}
}
analyzedInitialPlan_ = TRUE;
}
// compute the resources required for this subset
void JBBSubsetAnalysis::computeRequiredResources(
MultiJoin * mjoin,
RequiredResources & reqResources,
EstLogPropSharedPtr & inLP)
{
// makesure initial plan has been analyzed
CMPASSERT(analyzedInitialPlan_);
CostScalar A = csOne;
CostScalar B (getDefaultAsDouble(WORK_UNIT_ESP_DATA_COPY_COST));
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
UInt32 numJoinChildren = leftDeepJoinSequence_.entries();
CANodeIdSet joinOuterChildren = getJBBCs();
CostScalar joinMaxCard = csZero;
CostScalar maxCard = csZero;
// iterate over all children except the last child
UInt32 i = 0;
for (i = 0; i < (numJoinChildren-1); i++)
{
// get join's max cardinality
joinMaxCard = appStatMan->
getStatsForCANodeIdSet(joinOuterChildren)-> getMaxCardEst();
CostScalar memoryResourcesRequired = csZero;
CostScalar cpuResourcesRequired = csZero;
CANodeIdSet joinChild = leftDeepJoinSequence_[i];
// get all the nodes on the left of the join
joinOuterChildren -= joinChild;
CostScalar innerChildCardinality = csZero;
CostScalar innerChildRecordSize = csZero;
CostScalar innerChildMaxCardinality = csZero;
CostScalar outerChildCardinality = csZero;
CostScalar outerChildRecordSize = csZero;
CostScalar outerChildMaxCardinality = csZero;
CostScalar joinChildDataAccessCost = csZero;
// if child is a MultiJoin
if (joinChild.entries() > 1)
{
JBBSubset * innerChildSet = joinChild.jbbcsToJBBSubset();
MultiJoin * innerChildMJ = mjoin->createSubsetMultiJoin(*innerChildSet);
JBBSubsetAnalysis * innerChildAnalysis = innerChildSet->getJBBSubsetAnalysis();
innerChildAnalysis->computeRequiredResources(innerChildMJ,reqResources, inLP);
innerChildCardinality = appStatMan->
getStatsForCANodeIdSet(joinChild)->
getResultCardinality();
innerChildMaxCardinality = appStatMan->
getStatsForCANodeIdSet(joinChild)->
getMaxCardEst();
innerChildRecordSize = innerChildMJ->getGroupAttr()->getCharacteristicOutputs().getRowLength();
}
// child is a single node
else if (!joinChild.contains(factTable_) || !starJoinTypeIFeasible())
{
CANodeId innerChildNode = joinChild.getFirst();
innerChildCardinality = appStatMan->
getStatsForCANodeId(innerChildNode)->
getResultCardinality();
innerChildMaxCardinality = appStatMan->
getStatsForCANodeId(innerChildNode)->
getMaxCardEst();
RelExpr * innerChildExpr = innerChildNode.getNodeAnalysis()->getOriginalExpr();
innerChildRecordSize = innerChildExpr->getGroupAttr()->getCharacteristicOutputs().getRowLength();
}
if (joinOuterChildren.entries() > 1)
{
JBBSubset * outerChildSet = joinOuterChildren.jbbcsToJBBSubset();
MultiJoin * outerChildMJ = mjoin->createSubsetMultiJoin(*outerChildSet);
JBBSubsetAnalysis * outerChildAnalysis = outerChildSet->getJBBSubsetAnalysis();
outerChildCardinality = appStatMan->
getStatsForCANodeIdSet(joinOuterChildren)->
getResultCardinality();
outerChildMaxCardinality = appStatMan->
getStatsForCANodeIdSet(joinOuterChildren)->
getMaxCardEst();
outerChildRecordSize = outerChildMJ->getGroupAttr()->getCharacteristicOutputs().getRowLength();
}
else{
CANodeId outerChildNode = joinOuterChildren.getFirst();
outerChildCardinality = appStatMan->
getStatsForCANodeId(outerChildNode)->
getResultCardinality();
outerChildMaxCardinality = appStatMan->
getStatsForCANodeId(outerChildNode)->
getMaxCardEst();
RelExpr * outerChildExpr = outerChildNode.getNodeAnalysis()->getOriginalExpr();
outerChildRecordSize = outerChildExpr->getGroupAttr()->getCharacteristicOutputs().getRowLength();
}
if (maxCard < joinMaxCard)
maxCard = joinMaxCard;
if (maxCard < innerChildMaxCardinality)
maxCard = innerChildMaxCardinality;
if (maxCard < outerChildMaxCardinality)
maxCard = outerChildMaxCardinality;
memoryResourcesRequired += (innerChildCardinality * innerChildRecordSize);
cpuResourcesRequired +=
(A * innerChildCardinality) +
(B * innerChildCardinality * innerChildRecordSize );
cpuResourcesRequired +=
(A * outerChildCardinality) +
(B * outerChildCardinality * outerChildRecordSize );
reqResources.accumulate(memoryResourcesRequired, cpuResourcesRequired, csZero, maxCard);
}
CANodeIdSet outerMostChild = leftDeepJoinSequence_[numJoinChildren-1];
if (outerMostChild.entries() > 1)
{
JBBSubset * childSet = outerMostChild.jbbcsToJBBSubset();
MultiJoin * childMJ = mjoin->createSubsetMultiJoin(*childSet);
JBBSubsetAnalysis * childAnalysis = childSet->getJBBSubsetAnalysis();
childAnalysis->computeRequiredResources(childMJ,reqResources, inLP);
}
}
CANodeId JBBSubsetAnalysis::findFactTable(
CANodeIdSet childSet,
CostScalar & factTableCKPrefixCardinality,
CANodeId & biggestTable)
{
// The fact table should be the table that has significantly more data read
// from it as compared to all the other tables in a multi-join. The data read
// is measured by the number of rows read by a scan of the table multiplied
// by the record size. The number of rows read by a scan is not the number
// of rows returned by the scan, rather all the rows read from the table based
// on clustering key prefix predicates.
// Hence anytime the word cardinality is used in this method, it refers to the
// cardinality of a table after application of local predicates on the prefix
// of the clustering key
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
CANodeId factTable = NULL_CA_ID;
const char* factTableName = ActiveSchemaDB()->getDefaults().
getValue(COMP_STRING_1);
// if the user is trying to force a certain table to be considered
// the fact table
if(stricmp(factTableName,"NONE") != 0)
{
//try to match the factTable Name to the table names of the JBBCs
CANodeId currentTable = NULL_CA_ID;
for(currentTable=childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
//if this is not a table move on to the next node
if(!currentTable.getNodeAnalysis()->getTableAnalysis())
continue;
JBBC * tableJBBC = currentTable.getNodeAnalysis()->getJBBC();
// fact table cannot depend on any other table
if(tableJBBC->getPredecessorJBBCs().entries())
continue;
//compare to currentTable's name
Int32 comparison_result = currentTable.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getCorrNameObj().\
getCorrNameAsString().\
compareTo(factTableName,
NAString::ignoreCase);
if(comparison_result == 0)
{
factTable = currentTable;
// get fact table cardinality after application
// of local key prefix preds
factTableCKPrefixCardinality =
appStatMan->
getStatsForLocalPredsOnCKPOfJBBC(factTable)->
getResultCardinality();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Picked the fact Table specified by user" << endl;
CURRCONTEXT_OPTDEBUG->stream() << "fact Table: " << factTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "fact Table num rows scanned: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(factTableCKPrefixCardinality.value()))\
<< endl;
}
#endif
break;
}
}
}
// the largest table (i.e. greatest data to be read) after application of local
// predicates on prefix of clustering key
CANodeId largestTable = NULL_CA_ID;
CostScalar largestTableTotalDataVol;
CostScalar largestTableCardinality;
#ifdef _DEBUG
// these are for debugging
CostScalar largestTableActualRecordSize(0);
CostScalar largestTableEffectiveRecordSize(0);
#endif //_DEBUG
// second largest table after application of local predicates on prefix of
// clustering key
CANodeId secondLargestTable = NULL_CA_ID;
CostScalar secondLargestTableTotalDataVol;
CostScalar secondLargestTableCardinality;
#ifdef _DEBUG
// these are for debugging
CostScalar secondLargestTableActualRecordSize(0);
CostScalar secondLargestTableEffectiveRecordSize(0);
#endif //_DEBUG
UInt32 recordSizeCap =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_4);
CostScalar maxRecordSize(recordSizeCap);
CostScalar sumOfTotalDataVol(csZero);
CostScalar numTablesInCurrentMultiJoin(csZero);
UInt32 numChildren = childSet.entries();
CostScalar PAtoScanRatio =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_45);
// Flag indicating if we got the following info for independent tables i.e.
// tables that are join via inner-non-semi joins
// * numTablesInCurrentMultiJoin
// * sumOfTotalDataVol
NABoolean gotTableStats = FALSE;
//The following loop performs the sorting of the JBBCs in
//this multi-join based on the cardinality after application
//of local predicates on clustering key prefix
for(UInt32 i = 0; i < numChildren; i++)
{
//To start with, pick an arbitrary child as largest
CANodeId largestChild = childSet.init();
childSet.next(largestChild);
if(!largestChild.getNodeAnalysis()->getTableAnalysis())
{
childSet.remove(largestChild);
continue;
}
JBBC * largestChildJBBC = largestChild.getNodeAnalysis()->getJBBC();
Join * largestChildParentJoin = largestChildJBBC->getOriginalParentJoin();
// fact table cannot depend on any other table
if (largestChildParentJoin &&
!(largestChildParentJoin->isInnerNonSemiNonTSJJoin()))
{
childSet.remove(largestChild);
continue;
}
//get cardinality
EstLogPropSharedPtr estLogProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(largestChild);
// cardinality of largest child
CostScalar largestChildCardinality = estLogProps->getResultCardinality();
// record size of largest child
CostScalar largestChildRecordSize = largestChild.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
estLogProps = appStatMan->getStatsForCANodeId(largestChild);
CostScalar largestChildOutputCard = estLogProps->getResultCardinality();
RowSize largestChildOutputRowSize = largestChild.getNodeAnalysis()->
getOriginalExpr()->
getGroupAnalysis()->
getGroupAttr()->
getRecordLength();
#ifdef _DEBUG
// This is for debugging
CostScalar largestChildActualRecordSize = largestChildRecordSize;
#endif //_DEBUG
if(largestChildRecordSize > maxRecordSize)
largestChildRecordSize = maxRecordSize;
#ifdef _DEBUG
// This is for debugging
CostScalar largestChildEffectiveRecordSize = largestChildRecordSize;
#endif //_DEBUG
CostScalar largestChildSubsetSize = largestChildCardinality *
largestChildRecordSize;
CostScalar largestChildDataRetrievedSize = largestChildOutputCard *
largestChildOutputRowSize;
CostScalar largestChildTotalDataVol = largestChildSubsetSize +
PAtoScanRatio * largestChildDataRetrievedSize / 100.0;
for( CANodeId child = largestChild; childSet.next(child); childSet.advance(child))
{
//don't execute the following code if child is not a table
if(!child.getNodeAnalysis()->getTableAnalysis())
continue;
JBBC * childJBBC = child.getNodeAnalysis()->getJBBC();
Join * childParentJoin = childJBBC->getOriginalParentJoin();
// fact table cannot depend on any other table
if (childParentJoin &&
!(childParentJoin->isInnerNonSemiNonTSJJoin()))
continue;
/*
// fact table cannot depend on any other table
if(childJBBC->getPredecessorJBBCs().entries())
continue;
*/
// Current child is a table increment the count
if(!gotTableStats)
numTablesInCurrentMultiJoin++;
//get cardinality of child after application of local preds on
//clustering key prefix
estLogProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(child);
CostScalar childCardinality = estLogProps->getResultCardinality();
//Record Size of the factTable
CostScalar childRecordSize = child.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
#ifdef _DEBUG
CostScalar childActualRecordSize = childRecordSize;
#endif //_DEBUG
if(childRecordSize > maxRecordSize)
childRecordSize = maxRecordSize;
#ifdef _DEBUG
CostScalar childEffectiveRecordSize = childRecordSize;
#endif //_DEBUG
CostScalar childSubsetSize = childCardinality * childRecordSize;
estLogProps = appStatMan->getStatsForCANodeId(child);
CostScalar childOutputCard = estLogProps->getResultCardinality();
RowSize childOutputRowSize = child.getNodeAnalysis()->
getOriginalExpr()->
getGroupAnalysis()->
getGroupAttr()->
getRecordLength();
CostScalar childDataRetrievedSize = childOutputCard * childOutputRowSize;
CostScalar childTotalDataVol = childSubsetSize +
PAtoScanRatio * childDataRetrievedSize / 100.0;
if(!gotTableStats)
sumOfTotalDataVol += childTotalDataVol;
//if this child's TotalDataVol is larger than the largest child's
//TotalDataVol, set this child to be the largest
if( childTotalDataVol >= largestChildTotalDataVol)
{
largestChild = child;
largestChildCardinality = childCardinality;
largestChildTotalDataVol=childTotalDataVol;
#ifdef _DEBUG
// These are for debugging
largestChildActualRecordSize = childActualRecordSize;
largestChildEffectiveRecordSize = childEffectiveRecordSize;
#endif //_DEBUG
}
}
// we got the table stats (not histograms but the stats mentioned above)
gotTableStats = TRUE;
//remove the largest child from the child set
childSet.remove(largestChild);
if ((largestTable != NULL_CA_ID) &&
(largestChild.getNodeAnalysis()->getTableAnalysis()))
{
secondLargestTable = largestChild;
secondLargestTableCardinality = largestChildCardinality;
secondLargestTableTotalDataVol = largestChildTotalDataVol;
#ifdef _DEBUG
// These are for debugging
secondLargestTableActualRecordSize = largestChildActualRecordSize;
secondLargestTableEffectiveRecordSize = largestChildEffectiveRecordSize;
#endif //_DEBUG
break;
}
// get the fact Table
if ((largestTable == NULL_CA_ID) &&
(largestChild.getNodeAnalysis()->getTableAnalysis()))
{
largestTable = largestChild;
largestTableCardinality = largestChildCardinality;
largestTableTotalDataVol = largestChildTotalDataVol;
#ifdef _DEBUG
//These are for debugging
largestTableActualRecordSize = largestChildActualRecordSize;
largestTableEffectiveRecordSize = largestChildEffectiveRecordSize;
#endif //_DEBUG
}
}
biggestTable = largestTable;
if ((secondLargestTable == NULL_CA_ID) &&
(largestTable != NULL_CA_ID))
{
// get CANodeIdSet representing this JBBSubset
CANodeIdSet childSet = getJBBCs();
JBBWA * jbbWA = jbb_->getJBBWA();
const CASortedList * sortedListOfNodes = jbbWA->getNodesSortedByLocalKeyPrefixPredsCard();
for( UInt32 i = 0;
i < sortedListOfNodes->entries();
i++)
{
if(childSet.contains((*sortedListOfNodes)[i]))
{
if((*sortedListOfNodes)[i] == largestTable)
{
factTableCKPrefixCardinality = largestTableCardinality;
factTable = largestTable;
}
break;
}
}
return factTable;
}
CostScalar averageSizeOfAllTablesExceptLargest(csZero);
sumOfTotalDataVol-=largestTableTotalDataVol;
numTablesInCurrentMultiJoin-=1;
if(numTablesInCurrentMultiJoin.isGreaterThanZero())
averageSizeOfAllTablesExceptLargest=
sumOfTotalDataVol / numTablesInCurrentMultiJoin;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Largest Table: " << largestTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Largest Table num rows scanned: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(largestTableCardinality.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Largest Table actual rowsize: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(largestTableActualRecordSize.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Largest Table effective rowsize: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(largestTableEffectiveRecordSize.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Largest Table data scanned: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(largestTableTotalDataVol.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Average size of all tables apart from largest: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(averageSizeOfAllTablesExceptLargest.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Second Largest Table is: " << secondLargestTable.getText() <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Second Largest Table num rows scanned: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(secondLargestTableCardinality.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Second Largest Table actual rowsize: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(secondLargestTableActualRecordSize.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Second Largest Table effective rowsize: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(secondLargestTableEffectiveRecordSize.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "Second Largest Table data scanned: ";
CURRCONTEXT_OPTDEBUG->stream() << istring(Lng32(secondLargestTableTotalDataVol.value()))\
<< endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif //_DEBUG
UInt32 factTableFactor =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_2);
//check if the largest table can be considered a fact table
if (((largestTableTotalDataVol/secondLargestTableTotalDataVol) >= factTableFactor) &&
(factTable == NULL_CA_ID))
{
factTableCKPrefixCardinality = largestTableCardinality;
factTable = largestTable;
}
//if we want to force a star join plan
//This has to be done otherwise the star join pattern is
//not considered a match without a fact table
if (((CmpCommon::getDefault(DIMENSIONAL_QUERY_OPTIMIZATION) == DF_ON) ||
(CmpCommon::getDefault(COMP_BOOL_13) == DF_ON)) &&
(factTable == NULL_CA_ID))
factTable = largestTable;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "FactTable chosen: " << factTable.getText() << endl;
}
#endif //_DEBUG
return factTable;
};//JBBSubsetAnalysis::findFactTable
CANodeId JBBSubsetAnalysis::findFactAndLargestTable(
CostScalar & factTableCKPrefixCardinality,
CANodeId & biggestTable)
{
if (computedFactAndLargestTable_)
{
biggestTable = largestTable_;
return factTable_;
}
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
CANodeId factTable = findFactTable
( childSet, factTableCKPrefixCardinality, biggestTable );
largestTable_ = biggestTable;
factTable_ = factTable;
computedFactAndLargestTable_ = TRUE;
return factTable;
};//JBBSubsetAnalysis::findFactAndLargestTable
CANodeId JBBSubsetAnalysis::computeCenterTable()
{
if(centerTableComputed_)
{
return centerTable_;
}
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//n is the number of jbbcs (i.e. tables) in this multijoin
UInt32 n = childSet.entries();
CANodeId center = NULL_CA_ID;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "There are "<<n<<" jbbcs"<<endl;
}
#endif //_DEBUG
// require atleast 3 tables
if (n < 3)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
// variable used in iteration
CANodeId currentTable;
// get a set of single row tables
CANodeIdSet singleRowTables;
// go over all the tables in the query
// and create a set of tables that are
// returning just one row
for(currentTable = childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
// get number of tables in this JBBC
Int32 numSubtreeTables = currentTable.getNodeAnalysis()
->getJBBC()
->getSubtreeTables().entries();
CostScalar jbbcCardinality = appStatMan->
getStatsForCANodeId(currentTable)->
getResultCardinality();
//currentTable.getNodeAnalysis()->getCardinality();
if((jbbcCardinality == 1) &&
(numSubtreeTables < 2))
singleRowTables.insert(currentTable);
}
n = n - singleRowTables.entries();
if( n < 3)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "SingleRow JBBCs are "<<singleRowTables.getText()<<endl;
}
#endif //_DEBUG
// minimum connectivity for center
UInt32 minimumCenterConnectivity = n-1;
// maximum connectivity for dimensions
UInt32 maximumDimensionConnectivity = 1;
// connectivity of the center / fact table
UInt32 centerConnectivity = 0;
// gets the maximum number of tables connected
// connected to a dimension table
UInt32 maxDimConnectivity = 0;
// figure out the parameters for matching the connectivity pattern
switch (n)
{
case 3:
case 4:
// for n == 4 or n==3
minimumCenterConnectivity = n-1;
maximumDimensionConnectivity = 1;
break;
case 5:
// for n == 5
minimumCenterConnectivity = n-2;
maximumDimensionConnectivity = 2;
break;
default:
// for n >= 6
minimumCenterConnectivity = n-3;
// Changed COMP_INT_0 to COMP_INT_20 to avoid conflict with Nice context changes, with this
// Nice Context feature is OFF by default. (06/07/06)
maximumDimensionConnectivity = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_20);
break;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern match parameters are: "<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Min center connectivity: "<<minimumCenterConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Max dimension connectivity: "<<maximumDimensionConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif //_DEBUG
// get the fact table name if a fact table has been forced
const char* factTableName = ActiveSchemaDB()->getDefaults().
getValue(COMP_STRING_1);
// variable that tells us that the
// table we want to be the fact table
// is found in the multijoin
Int32 centerTableForced = 0;
// The loop below tries to match the connectivity pattern
// iterate over all the jbbcs (i.e. tables)
// in this multijoin
// In each iteration
// Figure out:
// * if this table is the center/fact table
// * if this table is a dimension table
for(currentTable = childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
// if factTableName is NONE (default value),
// then we don't want to force a particular
// table to be the fact table
if(stricmp(factTableName,"NONE") != 0)
{
//if this is not a table move on to the next node
if(!currentTable.getNodeAnalysis()->getTableAnalysis())
{
// compare to currentTable's name
// if comparison is successful we get a zero
// therefore reverse the result
centerTableForced = !(currentTable.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getCorrNameObj().\
getCorrNameAsString().\
compareTo(factTableName,
NAString::ignoreCase));
// we found a the table that we wanted
// force as the fact table
if (centerTableForced)
{
center = currentTable;
centerConnectivity = center.getNodeAnalysis()->
getJBBC()->
getJoinedJBBCs().
entries();
}
}
}
//all JBBCs connect to current table
CANodeIdSet connectedChildren = currentTable.getNodeAnalysis()->getJBBC()->getJoinedJBBCs();
// get number of tables connected to current jbbc
UInt32 numConnectedChildren = connectedChildren.entries();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBC: "<<currentTable.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "connected jbbcs: "<<connectedChildren.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif //_DEBUG
// if current jbbc is:
// * connected to at least minimumCenterConnectivity jbbcs
// * has a connectivity greater than the connectivity
// of the current center/fact table
// then set this jbbc to be the center/fact table
if ((numConnectedChildren >= minimumCenterConnectivity) &&
(centerConnectivity < numConnectedChildren) &&
(currentTable.getNodeAnalysis()->getTableAnalysis()) &&
(!centerTableForced))
{
// see we are going have found a new center
// the previous center should be a dimension
// make sure it is not connected to more than
// the allowed number of connected tables for
// dimension tables.
// If the previous center is connected to a
// greater number of tables than allowed for
// a dimension table then fail pattern match
if (centerConnectivity > maximumDimensionConnectivity)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
center = currentTable;
centerConnectivity = numConnectedChildren;
}
else{
// this table is potentially a dimension table
// make sure it is not connected to more than
// the allowed number of tables
// Ignore tables that return 1 row when counting
// the connected tables
UInt32 numNonSingleRowConnectedChildren =
connectedChildren.subtractSet(singleRowTables).entries();
// if the number of connected children is greater than
// the maximum connectivity of any dimension table upto
// this point, then update the maximum connectivity
if (numNonSingleRowConnectedChildren > maxDimConnectivity)
maxDimConnectivity = numNonSingleRowConnectedChildren;
// if this table is connected to more than the maximum
// number of tables allowed for a dimension table
// then fail the pattern match
if ( numNonSingleRowConnectedChildren > maximumDimensionConnectivity)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
}
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
if(center != NULL_CA_ID)
{
CURRCONTEXT_OPTDEBUG->stream() << "Center Table is: "<<center.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center connectivity is: "<<centerConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Max dimension connectivity is: "<<maxDimConnectivity<<endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
else{
CURRCONTEXT_OPTDEBUG->stream() << "No center table found "<<center.getText()<<endl;
}
}
#endif //_DEBUG
// we did not find a center table
if (center == NULL_CA_ID)
{
centerTableComputed_ = TRUE;
centerTable_ = NULL_CA_ID;
centerTableConnectivity_ = 0;
maxDimensionConnectivity_ = 0;
return centerTable_;
}
// get rows to be scanned for center table
CostScalar centerTableRowsToBeScanned =
appStatMan->getStatsForLocalPredsOnCKPOfJBBC(center)->
getResultCardinality();
// get record size for center table
CostScalar centerTableRecordSize =
center.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
// compute amount of data to be scanned from center table
// amount is in KB
CostScalar centerTableDataToBeScanned =
(centerTableRowsToBeScanned * centerTableRecordSize) / 1024;
// get number of partitions for the center table
CostScalar numCenterTablePartitions =
center.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getClusteringIndex()->
getNAFileSet()->
getCountOfPartitions();
CostScalar sizePerPartition = centerTableDataToBeScanned / numCenterTablePartitions;
centerTableComputed_ = TRUE;
centerTable_ = center;
centerTableConnectivity_ = centerConnectivity;
maxDimensionConnectivity_ = maxDimConnectivity;
centerTableRowsScanned_ = centerTableRowsToBeScanned;
centerTableDataScanned_ = centerTableDataToBeScanned;
centerTablePartitions_ = numCenterTablePartitions;
centerTableDataPerPartition_ = sizePerPartition;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Rows Scanned: "\
<<centerTableRowsScanned_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Record Size: "\
<<centerTableRecordSize.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Data Scanned: "\
<<centerTableDataScanned_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table Num partitions: "\
<<centerTablePartitions_.getValue()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Center Table size per partition: "\
<<centerTableDataPerPartition_.getValue()<<endl;
}
#endif //_DEBUG
return centerTable_;
} //JBBSubsetAnalysis::computeCenterTable
// This method, given a fact table, matches a multi-join to a star pattern
NABoolean JBBSubsetAnalysis::isAStarPattern(CANodeId factTable,
CostScalar factTableCKPrefixCardinality)
{
if (CmpCommon::getDefault(COMP_BOOL_88) == DF_ON)
return FALSE;
if(CmpCommon::getDefault(NESTED_JOINS) == DF_OFF)
return FALSE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis_isAStarPattern_Begin" <<endl;
}
#endif //_DEBUG
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//all nodes in this multi-join except the fact table
CANodeIdSet nodesExcludingFactTable = childSet;
nodesExcludingFactTable.remove(factTable);
//temp vars used for iteration
ULng32 i=0;
CANodeId currentNode;
CANodeIdSet currentNodeSet;
CostScalar currentCardinality;
// get the index_desc for the facttable primary index
const IndexDesc * factTableIndexDesc =
factTable.getNodeAnalysis()
->getTableAnalysis()
->getTableDesc()
->getClusteringIndex();
const PartitioningFunction * factTablePartFunc =
factTableIndexDesc
->getPartitioningFunction();
ValueIdSet factTablePartKey;
Int32 factTableNumPartitions = 1;
NABoolean factIsHashPartitioned = FALSE;
if (factTablePartFunc)
{
factTablePartKey =
factTablePartFunc->getPartitioningKey();
factTableNumPartitions =
factTablePartFunc->getCountOfPartitions();
factIsHashPartitioned =
factTablePartFunc->isATableHashPartitioningFunction();
}
//for further processing we'll need the clustering key of the fact table
ValueIdList factTableCK =
factTableIndexDesc->getIndexKey();
// Variables used during iteration
ValueId keyColumn;
CANodeIdSet connectedTables;
CANodeIdSet tablesConnectedToColumn;
CANodeIdSet tablesConnectedViaThisColumn;
Lng32 numPrefixColsCoveredFromFactTableCK = 0;
// this variable is used to get the list of
// fact table clustering key prefix columns
// that are covered by join or local preds
ValueIdList coveredFactTableCKPrefix;
// temp vars to pass into getJBBCsConnectedToPrefixOfList()
ValueIdSet dummyForJoinPreds;
ValueIdSet dummyForLocalPreds;
// get:
// 1. Tables connected to Fact Table via join predicates on clustering key prefix
// 2. Fact Table clustering key prefix columns that have a predicate on them
// 3. Number of fact table clustering key prefix columns covered
connectedTables = factTable.getNodeAnalysis()->
getTableAnalysis()->
getJBBCsConnectedToPrefixOfList(
nodesExcludingFactTable,
factTableCK,
numPrefixColsCoveredFromFactTableCK,
dummyForJoinPreds,
dummyForLocalPreds);
// if a prefix of the fact table clustering key is not covered
// by a join predicate, then return FALSE
if((!connectedTables.entries()))
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Not Matched, there is no join predicate on prefix of clustering key" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
// this set will be used to keep track of tables
// that are not part of the edges of the star
// schema
CANodeIdSet availableNodes(childSet);
// remove fact table from available nodes
availableNodes-=factTable;
// remove tables directly connected to the fact table
// via join predicates on fact table clustering key
// prefix
availableNodes-=connectedTables;
// set of tables representing the edges of the star from tables connected to the
// prefix of the fact table clustering key
// start by building an edge from the tables connected to the first prefix column
// then add to that the edge from the tables connected to the next prefix column
// and so on....
// once an edge is built it will be inserted in a list of edges (variable
// listOfEdges) The list as evident from the comments above will be ordered by
// the position of the fact table clustering key column through which the edge
// is connected.
CANodeIdSet edge;
// variable used while iterating in the loops below
CANodeIdSet currentEdge;
// Set of all tables in edges that should be joined before fact table
// i.e. rows coming out of this set of tables will be nested joined
// into fact table
// If this set is empty then the star pattern was not matched
CANodeIdSet optimalEdgeSet;
// Cardinality of the set of tables in an edge
CostScalar dataFlowFromEdge;
// Cardinality of the fact table after application of clustering key
// prefix columns after joining with the tables in the set 'edge'.
CostScalar factTableRowsToScan;
// Rows returned from the factTable
CostScalar dataFlowFromFactTable;
// cost of accessing the fact Table as outer most table
CostScalar factTableHashJoinCost;
// compute the cost of accessing the fact Table as the outer most table
factTableHashJoinCost = computeDataAccessCostForTable(csZero,
factTableCKPrefixCardinality,
factTable);
// variable will be used while iterating through the loops below
// cost of nested join of fact table with tables from edge
CostScalar factTableCost;
// variable use to keep track of the lowest cost of a nested join
// on the fact table
//CostScalar lowestFactTableCost(csZero);
CostScalar lowestCombinedCost(csZero);
// This determines how much cheaper a fact table nested should be
// vs hash join / full scan of the fact table, for a plan with a
// nested join on the fact table to be considered feasible.
// Currently the default value is 10, this can be adjusted after
// further testing.
// Remember we are trying to find if a type-I plan is feasible
// If not we'll try a type-II plan (i.e. a plan with a hash
// join on the fact table)
UInt32 factTableCostFactor =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_3);
// list of edges, where each edge starts from a column connected
// to a key prefix of the fact table. The list is in the order
// of the columns in the key of the fact table
NAList<CANodeIdSet> * listOfEdges =
new (CmpCommon::statementHeap()) NAList<CANodeIdSet> (CmpCommon::statementHeap(),
connectedTables.entries());
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Cost estimate of fact table access as outer most table: "\
<< istring(Lng32(factTableHashJoinCost.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Star Join will make sense if fact table nested join access is "\
<< istring(Lng32(factTableCostFactor))\
<< " times cheaper" << endl;
}
#endif //_DEBUG
//Doing a nested join on the fact table will only make sense if accessing
//fact table as outer most table is factTableCostFactor times as expensive
//as doing a nested join
factTableHashJoinCost = factTableHashJoinCost / factTableCostFactor;
//set of connected tables that have already been used to build edges
CANodeIdSet usedConnectedTables;
UInt32 lookAhead = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_18);
// in the loop below we iterate over the columns of the fact table clustering
// key prefix that are covered by either local or join predicates.
// We iterate in order of the column's position in the clustering key.
// In each iteration we try to see if the column has a join pred then
// start an edge from the table connected via the column.
// This is done so that we get a list of edges in an order which would
// allow for ordered probes into the nested join on the fact table
// e.g. consider the following connectivity graph
// fact table cluster key is : a,b,c in that order
//
//
// DIM2
// |
// b
// |
// DIM1-a-FACT-c-DIM3
//
//
// Then we want to get the edges in order starting from the outer most
// i.e. DIM1, DIM2, DIM3
//
// Since we are attempting to get a plan like
//
// NJ
// / \
// / \
// NJ FACT
// / \
// / \
// NJ DIM3
// / \
// / \
// DIM1 DIM2
CANodeIdSet prevEdgeAndFact;
prevEdgeAndFact += factTable;
//iterate over the covered prefix of the clustering key
for (i = 0 ; i < (ULng32) numPrefixColsCoveredFromFactTableCK; i++)
{
// Get the covered prefix on the fact table clustering key
// covered prefix is constituted by columns that have either
// a local or a join predicate on them.
// get a handle to the key column
keyColumn = factTableCK[i];
//get a handle to the column analysis for this column
ColAnalysis * keyColAnalysis = keyColumn.colAnalysis();
// check if this column has join predicates on it
if(!keyColAnalysis->getAllConnectedJBBCs().entries())
{
// if this does not have a join predicate on it
// then it's covered by a local pred. Add it to
// the list of key prefix columns covered.
// It could also be because of a veg this column
// does not have a join rather a local pred e.g.
//
// select count(*)
// from
// fact, dim
// where
// fact.a = dim.a and
// dim.a = 10;
//
// In this case 'fact.a = 10' is implied via a veg
// and the join between fact and dim is essentially
// a cross product after application of local preds
coveredFactTableCKPrefix.insert(keyColumn);
continue;
}
// Get tables from this multi-join (i.e. the multi-join on which we are
// doing the top match) that are connected to the fact table via this
// column
// This will get all tables connected via this column
tablesConnectedViaThisColumn = keyColAnalysis->getAllConnectedJBBCs();
// This will narrow down the set of tables from all connected
// tables to connected tables in this multi-join
tablesConnectedViaThisColumn.intersectSet(connectedTables);
if(!tablesConnectedViaThisColumn.entries())
{
if(usedConnectedTables.entries())
break;
else
return FALSE;
}
//tablesConnectedViaThisColumn should have atleast one entry
CCMPASSERT(tablesConnectedViaThisColumn.entries());
/*
if(!tablesConnectedViaThisColumn.entries())
{
if(listOfEdges->entries())
break;
else
return FALSE;
}
*/
// insert this column into list of key prefix columns covered
coveredFactTableCKPrefix.insert(keyColumn);
//get the number of tables connected via this columns
UInt32 numTablesConnectedViaThisColumn =
tablesConnectedViaThisColumn.entries();
// Remove tables that have already been considered in previous iterations
tablesConnectedViaThisColumn.subtractSet(usedConnectedTables);
// get connected tables that have not been considered
// i.e. we have not built an edge starting from them.
UInt32 connectedTablesNotConsidered =
tablesConnectedViaThisColumn.entries();
// there are no connected tables to consider
if(!connectedTablesNotConsidered)
continue;
// if tables connected via this column is less than the number of
// connected tables not considered
if(numTablesConnectedViaThisColumn > connectedTablesNotConsidered)
{
// This would happen in the following scenario
// consider fact table key is a, b, c
// consider column connectivity as below
//
// a b c
// | | |
// DIM1 DIM1 |
// | |
// DIM2 DIM2
//
// Notice here when we come to b, we would have already covered b if
// we started an edge from DIM1 while iterating for column 'a'. Therefore
// b is already covered and we can skip over to column c.
//
// This is a special case but I was worried about the case where all dimension
// table might joined to the fact table via a partitioning key column in addition
// to what ever other part of the fact table key they join to.
// This was apparently an MP practice to try to colocate joins in the same partitions
//
// In such a case it would be preferrable to get an ordering of DIM1,DIM2 rather
// than DIM2, DIM1 for the probes going into the fact table nested join
// This column is already covered, move to the next one
continue;
}
// the next table to consider for starting an edge
CANodeId tableToConsider = NULL_CA_ID;
CostScalar dataFlowFromTableToConsider = -1;
CostScalar dataFlowAfterIncludingTableToConsider = -1;
if (tablesConnectedViaThisColumn.entries()==1)
{
// get the jbbc in this set
tableToConsider = tablesConnectedViaThisColumn.getFirst();
}
else{
Lng32 prefixCoveredByTableToConsider = 0;
CostScalar tableToConsiderFactTableCost = 0;
CANodeIdSet coveredTables;
// Find the table that gives the max key coverage. Consider the example
// below:
// consider fact table key is a, b, c
// consider column connectivity as below
//
// a b c
// | | |
// DIM1 DIM1 |
// | |
// DIM2 DIM2
//
// When iterating for column 'a' we would like to start an edge
// from table DIM1 rather than DIM2, since 'a' will cover a greater
// key prefix.
for(CANodeId currentTable = tablesConnectedViaThisColumn.init();
tablesConnectedViaThisColumn.next(currentTable);
tablesConnectedViaThisColumn.advance(currentTable))
{
if (!usedConnectedTables.legalAddition(currentTable))
continue;
Lng32 coveredPrefix = 0;
CostScalar dataFlowFromCurrentTable =
appStatMan->
getStatsForCANodeId(currentTable)->
getResultCardinality();
// add currentTable to list of tables that we
// have already determined cover key prefix
// In each iteration:
// 1. Add the current table to this list
// 2. determine the key prefix covered
// 3. Remove current table from this list
// Eventually the table that provides the
// greatest key prefix will be considered
usedConnectedTables.insert(currentTable);
// find the key prefix covered
coveredTables = factTable.getNodeAnalysis()->
getTableAnalysis()->
getJBBCsConnectedToPrefixOfList(
usedConnectedTables,
factTableCK,
coveredPrefix,
dummyForJoinPreds,
dummyForLocalPreds);
//build the edge from the current table
CANodeIdSet currentTableEdge =
extendEdge(currentTable, availableNodes,lookAhead);
//all tables before fact
CANodeIdSet jbbcsBeforeFact = currentTableEdge;
jbbcsBeforeFact += edge;
CostScalar dataFlowAfterIncludingCurrentTable =
appStatMan->
getStatsForCANodeIdSet(jbbcsBeforeFact)->
getResultCardinality();
if((factIsHashPartitioned) &&
(CmpCommon::getDefault(COMP_BOOL_150) == DF_OFF))
{
ValueIdSet factTableColumnsJoined = factTable.getNodeAnalysis()
->getTableAnalysis()
->getColsConnectedViaEquiJoinPreds(jbbcsBeforeFact);
CollIndex numPartKeyCols = factTablePartKey.entries();
CollIndex numJoinedCols = factTableColumnsJoined.entries();
CollIndex numPartKeyColsCovered = 0;
for (ValueId VId = factTablePartKey.init();
factTablePartKey.next(VId);
factTablePartKey.advance(VId) )
{
ItemExpr * ce;
ValueIdSet vIdSet(VId);
if(vIdSet.referencesAConstExpr(&ce))
{
numPartKeyColsCovered++;
continue;
};
ValueIdSet tempJoinedCols = factTableColumnsJoined;
tempJoinedCols.removeCoveredVIdSet(VId);
if(tempJoinedCols.entries() < numJoinedCols)
numPartKeyColsCovered++;
else
break;
}
if(numPartKeyColsCovered < numPartKeyCols)
{
dataFlowAfterIncludingCurrentTable =
dataFlowAfterIncludingCurrentTable * factTableNumPartitions;
}
}
// figure out the number of fact table rows that'll be scanned for this
// nested join
factTableRowsToScan = appStatMan->
getStatsForJoinPredsOnCKOfJBBC(jbbcsBeforeFact, factTable)->
getResultCardinality();
// figure out the rows coming out of the fact table after this nested join
// using joinJBBChildren(prevEdgeAndFact, currentEdge) instead of
// using joinJBBChildren(edge, factTable), in order to drive ASM using
// joins, 'edge' could have cross products and will miss some cardinality
// adjustments because of join with a potential cross product.
dataFlowFromFactTable = appStatMan->
joinJBBChildren(prevEdgeAndFact, currentTableEdge)->
getResultCardinality();
// now cost the nested join
factTableCost = computeDataAccessCostForTable(dataFlowAfterIncludingCurrentTable,
factTableRowsToScan,
factTable);
//put back nodes that were made unavailable
currentTableEdge -= currentTable;
availableNodes += currentTableEdge;
if ( (CmpCommon::getDefault(COMP_BOOL_14) == DF_ON))
{
// if covered prefix is the greater than the
// max prefix we have till now, then consider
// this table to start an edge from.
if((tableToConsider == NULL_CA_ID) ||
(prefixCoveredByTableToConsider < coveredPrefix) ||
((prefixCoveredByTableToConsider) &&
(prefixCoveredByTableToConsider == coveredPrefix) &&
(dataFlowFromTableToConsider > dataFlowFromCurrentTable)))
{
tableToConsider = currentTable;
tableToConsiderFactTableCost = factTableCost;
prefixCoveredByTableToConsider = coveredPrefix;
dataFlowFromTableToConsider = dataFlowFromCurrentTable;
dataFlowAfterIncludingTableToConsider = dataFlowAfterIncludingCurrentTable;
}
}
else{
if ((tableToConsiderFactTableCost == 0) ||
(factTableCost < tableToConsiderFactTableCost))
{
tableToConsider = currentTable;
tableToConsiderFactTableCost = factTableCost;
prefixCoveredByTableToConsider = coveredPrefix;
dataFlowFromTableToConsider = dataFlowFromCurrentTable;
dataFlowAfterIncludingTableToConsider = dataFlowAfterIncludingCurrentTable;
}
}
// remove currentTable from list of tables that we
// have already determined cover key prefix
usedConnectedTables.remove(currentTable);
}
}
// finally we have determined the table that provides for
// the greatest key prefix on the fact table, add it to
// the list of connected tables for which we had built edges
usedConnectedTables.insert(tableToConsider);
//build the edge
currentEdge = extendEdge(tableToConsider, availableNodes,lookAhead);
(*listOfEdges).insert(currentEdge);
edge += currentEdge;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built an edge starting from table "\
<< tableToConsider.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The edge is " \
<< currentEdge.getText()<<endl;
}
#endif
// figure out the rows coming out of the fact table after this nested join
// using joinJBBChildren(prevEdgeAndFact, currentEdge) instead of
// using joinJBBChildren(edge, factTable), in order to drive ASM using
// joins, 'edge' could have cross products and will miss some cardinality
// adjustments because of join with a potential cross product.
dataFlowFromFactTable = appStatMan->
joinJBBChildren(prevEdgeAndFact, currentEdge)->
getResultCardinality();
prevEdgeAndFact += currentEdge;
}
// variable used in iteration
CANodeId connectedTable = NULL_CA_ID;
// if there are still some tables left that were on the key columns
// but were not considered earlier because we where optimizing key
// coverage
if(usedConnectedTables.entries() != connectedTables.entries())
{
// build an edge from all the tables in the unusedConnectedTables set
// since they still have predicates on fact table clustering key prefix
// columns and could provide reduction.
CANodeIdSet unusedConnectedTables(connectedTables);
unusedConnectedTables.subtractSet(usedConnectedTables);
for(connectedTable = unusedConnectedTables.init();
unusedConnectedTables.next(connectedTable);
unusedConnectedTables.advance(connectedTable))
{
//build the edge
currentEdge = extendEdge(connectedTable, availableNodes, lookAhead);
(*listOfEdges).insert(currentEdge);
edge += currentEdge;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built an edge starting from table "\
<< connectedTable.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The edge is " \
<< currentEdge.getText()<<endl;
}
#endif
}
}
// We have the list of edges joined to the fact table in the order
// of the fact table clustering key columns.
// Find the optimal location for the fact table, i.e. we'll try to
// see which tables should be join below the fact table in a left
// deep join sequence.
//
// e.g. consider the following connectivity graph
// fact table cluster key is : a,b,c in that order
//
//
// DIM2
// |
// b
// |
// DIM1-a-FACT-c-DIM3
//
//
// Then the list of edges will be DIM1, DIM2, DIM3
//
// In the following loop we'll estimate the cost of
// doing a nested join on the fact table after
// 1. DIM1,
// 2. DIM1, DIM2
// 3. DIM1, DIM2, DIM3
//
// Then we compare the optimal cost of doing a nested join
// on the fact table, with the cost of just doing a hash
// join on the fact table which basically involves scanning the
// whole fact table.
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Starting to evaluate optimal location of Fact Table "<< endl;
}
#endif
//cost of fact table nested join after
//maximum possible key prefix coverage
CostScalar costForMaxKeyCoverage=0;
prevEdgeAndFact.clear();
edge.clear();
CostScalar costOfDimProbes(csZero);
double probeHashTableUnitCost =
(double) (ActiveSchemaDB()->getDefaults()).getAsDouble
(MULTI_JOIN_PROBE_HASH_TABLE);
// iterate over list of edges
for ( i = 0; i < (*listOfEdges).entries(); i++)
{
// get the current edge from the list of edges
currentEdge = (*listOfEdges)[i];
//the edge that was used in the last iteration
prevEdgeAndFact = edge;
prevEdgeAndFact += factTable;
// add it to the cumulative edge, i.e. list of
// all tables we are considering to be below the
// fact table
edge += currentEdge;
// get the number of rows coming out of all the tables
// that'll be below the fact tables.
// This is the number of probes going into the fact table
// for this nested join
dataFlowFromEdge = appStatMan->
getStatsForCANodeIdSet(edge)->
getResultCardinality();
if((factIsHashPartitioned) &&
(CmpCommon::getDefault(COMP_BOOL_150) == DF_OFF))
{
ValueIdSet factTableColumnsJoined = factTable.getNodeAnalysis()
->getTableAnalysis()
->getColsConnectedViaEquiJoinPreds(edge);
CollIndex numPartKeyCols = factTablePartKey.entries();
CollIndex numJoinedCols = factTableColumnsJoined.entries();
CollIndex numPartKeyColsCovered = 0;
for (ValueId VId = factTablePartKey.init();
factTablePartKey.next(VId);
factTablePartKey.advance(VId) )
{
ItemExpr * ce;
ValueIdSet vIdSet(VId);
if(vIdSet.referencesAConstExpr(&ce))
{
numPartKeyColsCovered++;
continue;
};
ValueIdSet tempJoinedCols = factTableColumnsJoined;
tempJoinedCols.removeCoveredVIdSet(VId);
if(tempJoinedCols.entries() < numJoinedCols)
numPartKeyColsCovered++;
else
break;
}
if(numPartKeyColsCovered < numPartKeyCols)
{
dataFlowFromEdge = dataFlowFromEdge * factTableNumPartitions;
}
}
// figure out the number of fact table rows that'll be scanned for this
// nested join
factTableRowsToScan = appStatMan->
getStatsForJoinPredsOnCKOfJBBC(edge, factTable)->
getResultCardinality();
// figure out the rows coming out of the fact table after this nested join
// using joinJBBChildren(prevEdgeAndFact, currentEdge) instead of
// using joinJBBChildren(edge, factTable), in order to drive ASM using
// joins, 'edge' could have cross products and will miss some cardinality
// adjustments because of join with a potential cross product.
dataFlowFromFactTable = appStatMan->
joinJBBChildren(prevEdgeAndFact, currentEdge)->
getResultCardinality();
// now cost the nested join
factTableCost = computeDataAccessCostForTable(dataFlowFromEdge,
factTableRowsToScan,
factTable);
// capture the cost of the nested join when all edges are join below the
// fact table.
// This will be the cost for max key coverage after the last iteration.
costForMaxKeyCoverage = factTableCost;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "FactTable after edge " \
<< currentEdge.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "The cummulative edge is "\
<< edge.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Probes into fact table: "\
<< istring(Lng32(dataFlowFromEdge.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Fact Table Rows to scan: "\
<< istring(Lng32(factTableRowsToScan.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Rows coming out of fact table: "\
<< istring(Lng32(dataFlowFromFactTable.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Our cost estimate of fact table nested join: "\
<< istring(Lng32(factTableCost.value()))\
<<endl;
}
#endif //_DEBUG
// if this is the first iteration
// or current nested join cost is cheaper
// than lowest nested join cost
CostScalar combinedCost = costOfDimProbes + factTableCost;
if ((NOT lowestCombinedCost.isGreaterThanZero()) ||
(combinedCost < lowestCombinedCost))
{
lowestCombinedCost = combinedCost;
if (lowestCombinedCost <= factTableHashJoinCost)
{
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
nodesJoinedBeforeFactTable_ =
optimalEdgeSet;
// set the optimal location of the fact table in the
// list of edges, this will be used by the nextSubstitute()
// method
optimalFTLocation_ = (Int32) i+1;
lowestFactNJCost_ = lowestCombinedCost;
}
}
#if 0
if ((NOT lowestFactTableCost.isGreaterThanZero()) ||
(factTableCost < lowestFactTableCost))
{
lowestFactTableCost = factTableCost;
if (lowestFactTableCost <= factTableHashJoinCost)
{
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
nodesJoinedBeforeFactTable_ =
optimalEdgeSet;
// set the optimal location of the fact table in the
// list of edges, this will be used by the nextSubstitute()
// method
optimalFTLocation_ = (Int32) i+1;
lowestFactNJCost_ = lowestFactTableCost;
}
}
#endif
costOfDimProbes += dataFlowFromEdge * probeHashTableUnitCost;
}
// note the cost for a nested join into fact
// if all the edges are below the fact
costForMaxKeyFactNJ_ = costForMaxKeyCoverage;
//This variable determines if we want all edges of the star
//that join on the key prefix on the Fact Table, to be joined
//below i.e. before the fact table.
NABoolean forceMaxKeyCoverageOnFT = FALSE;
// COMP_BOOL_12 is used to force a plan with max key coverage on
// the fact table, i.e. join all edges covering key prefix column
// below the fact table to get max key coverage for nested join
// on fact table.
// COMP_BOOL_11 is similar to COMP_BOOL_12. It differs to COMP_BOOL_12
// in that it does not force a max key coverage plan if the cost of
// the nested join is higher than the cost of the hash join on the
// fact table.
// COMP_BOOL_13 basically means to force a star join rule plan.
//if we want to force max key prefix coverage on fact table
//this should only be allowed if fact table nested join is
//cheaper than doing a full scan of the fact table (i.e. the
//hash join scenario)
//If COMP_BOOL_12 is 'ON' then we'll just force a plan with
//max key prefix coverage on fact table
if ((costForMaxKeyCoverage < factTableHashJoinCost) &&
(CmpCommon::getDefault(COMP_BOOL_11) == DF_ON))
forceMaxKeyCoverageOnFT = TRUE;
//If COMP_BOOL_12 is 'ON' then we'll just force a plan with
//max key prefix coverage on fact table
if (CmpCommon::getDefault(COMP_BOOL_12) == DF_ON)
forceMaxKeyCoverageOnFT = TRUE;
//COMP_BOOL_13 means force star join rule !!!
//therefore if starjoin rule was not found feasible
//just force max key coverage i.e. force all the
//edges joining on FT key prefix columns to be below
//fact table
if (((!optimalEdgeSet.entries()) &&
( (CmpCommon::getDefault(DIMENSIONAL_QUERY_OPTIMIZATION) == DF_ON) ||
(CmpCommon::getDefault(COMP_BOOL_13) == DF_ON) )) &&
(CmpCommon::getDefault(COMP_BOOL_77) != DF_ON))
forceMaxKeyCoverageOnFT = TRUE;
// if we are forcing max key prefix coverage for the
// fact table nested join.
if(forceMaxKeyCoverageOnFT)
{
edge.clear();
// iterate over list of edges
for ( i = 0; i < (*listOfEdges).entries(); i++)
{
currentEdge = (*listOfEdges)[i];
edge += currentEdge;
}
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
nodesJoinedBeforeFactTable_ = optimalEdgeSet;
optimalFTLocation_ = (Int32) (*listOfEdges).entries();
}
// from the set of tables in all the edges that join to the key prefix
// take out the set of tables that compose the fringes that should be joined
// before the fact table
edge.subtractSet(optimalEdgeSet);
// add the remaining tables to the available nodes set
// these tables will be joined above/after the fact table
// in the left deep join sequence
availableNodes.addSet(edge);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "The tables below fact table"\
<< optimalEdgeSet.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "The tables above fact table "\
<< availableNodes.getText()<<endl;
}
#endif //_DEBUG
// nodes to be joined after Fact Table
availableNodes_ = availableNodes;
// save list of edges in the work area it'll be used when by nextSubstitute()
listOfEdges_ = listOfEdges;
// if optimalEdgeSet is empty it could mean
// 1. there were not join predicates on fact table clustering key prefix
// 2. there were join predicates on fact table clustering key prefix but
// nested join on fact table clustering is just too expensive.
// In either case, a type-I nested join plan is not a good idea
if (!optimalEdgeSet.entries())
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Not Matched, there is no significant reduction on fact table" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Matched" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
CostScalar factNJAccessCost = lowestFactNJCost_;
if (CmpCommon::getDefault(COMP_BOOL_12) == DF_ON)
factNJAccessCost = costForMaxKeyFactNJ_;
factTable.getNodeAnalysis()->
getTableAnalysis()->
setFactTableNJAccessCost(factNJAccessCost);
return TRUE;
}
// get a rough estimate of cost for doing a nested join on the fact table
// number of probes = dataFlowFromEdge
// Rows of fact table that will be scanned = factTableRowsToScan
CostScalar JBBSubsetAnalysis::computeDataAccessCostForTable(
CostScalar probes,
CostScalar tableRowsToScan,
CANodeId table)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
CostScalar costPerProbe ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_0));
CostScalar costPerUnitSize ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_1));
CostScalar costPerRowReturned ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_2));
//Record Size of the factTable
CostScalar tableRecordSize = table.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
CostScalar subsetSizeKB = (tableRowsToScan * tableRecordSize)/1024;
CostScalar tableRowsReturned = appStatMan->
getStatsForCANodeId(table)->
getResultCardinality();
RelExpr * tableScanExpr = table.getNodeAnalysis()->getOriginalExpr();
CostScalar sizeOfRowReturnedFromTable =
tableScanExpr->getGroupAttr()->getRecordLength();
CostScalar costForTable = (costPerProbe * probes) +
(costPerUnitSize * subsetSizeKB) +
(costPerRowReturned *
tableRowsReturned *
sizeOfRowReturnedFromTable);
return costForTable;
}
//extends an edge of the star
CANodeIdSet JBBSubsetAnalysis::extendEdge(CANodeId thisTable,//in
CANodeIdSet& availableNodes,
UInt32 lookAhead)//in\out
{
// CANodeIdSet representing thisTable
CANodeIdSet thisTableSet(thisTable);
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
// return value
CANodeIdSet edge;
//remove this table from the availableNodes
//if it there
if(availableNodes.contains(thisTable))
availableNodes.remove(thisTable);
//extend the edge to include thisTable
edge += thisTable;
if(!availableNodes.entries())
return edge;
if(lookAhead)
lookAhead -=1;
//if(!lookAhead)
// return edge;
// Tables connected to thisTable that are still available
CANodeIdSet connectedTables;
// get all connected tables that are still available
// i.e. not already part of some edge.
connectedTables = thisTable.getNodeAnalysis()
->getJBBC()->getJoinedJBBCs();
connectedTables.intersectSet(availableNodes);
CANodeIdSet connectedTablesBackup = connectedTables;
UInt32 numConnectedTables = connectedTables.entries();
// get after local predicate cardinality of thisTable
CostScalar thisTableCardinality = appStatMan->
getStatsForCANodeId(thisTable)->
getResultCardinality();
//temp var to capture the set representing edge extension
CANodeIdSet edgeExtension;
//temp var to get cardinality of edge as it is being built
CostScalar currentEdgeCardinality;
//variables used for iteration
UInt32 i;
CANodeId connectedTable;
CANodeIdSet connectedTableSet;
const CostScalar EXTENSION_ALLOWANCE(1.05); // We allow 5% increase
// sort connected Tables by cardinality of join with thisTable
for (i = 0;i < numConnectedTables; i++)
{
CostScalar lowestCardinality = 0;
// A table from the set of connectedTables that produces the lowest
// cardinality when joined to thisTable
CANodeId smallestRemainingTable = NULL_CA_ID;
for(connectedTable = connectedTables.init();
connectedTables.next(connectedTable);
connectedTables.advance(connectedTable))
{
if(!edge.legalAddition(connectedTable))
continue;
//get a CANodeIdSet only containing the connectedTable to estimate
//join of connectedTable with thisTable.
connectedTableSet = connectedTable;
//get cardinality for join of thisTable and connectedTable
CostScalar joinCardinality =
appStatMan->joinJBBChildren(thisTableSet, connectedTableSet)
->getResultCardinality();
if(smallestRemainingTable == NULL_CA_ID)
{
lowestCardinality = joinCardinality;
smallestRemainingTable = connectedTable;
continue;
}
if(joinCardinality < lowestCardinality)
{
lowestCardinality = joinCardinality;
smallestRemainingTable = connectedTable;
}
}
//add the table that produces the lowest cardinality
//after joining with thisTable
//connectedTablesSorted.insert(smallestRemainingTable);
if(smallestRemainingTable == NULL_CA_ID)
continue;
JBBC * smallestRemainingTableJBBC =
smallestRemainingTable.getNodeAnalysis()->getJBBC();
JBBSubset * thisTableJBBSubset =
thisTableSet.jbbcsToJBBSubset();
NABoolean isGuaranteedNonExpandingJoin =
thisTableJBBSubset->
isGuaranteedNonExpandingJoin(*smallestRemainingTableJBBC);
// if the smallestRemaingTable
if((lowestCardinality <= (EXTENSION_ALLOWANCE * thisTableCardinality))&&
(isGuaranteedNonExpandingJoin ||
lookAhead ||
smallestRemainingTableJBBC->isOneRowMax()))
{
//sortedListOfConnectedReducingTables.insert(connectedTable);
//only extend to a table that is still available
if(availableNodes.contains(smallestRemainingTable))
{
edgeExtension = extendEdge(smallestRemainingTable,
availableNodes,
lookAhead);
// get cardinality for join of edge and edgeExtension
// I don't really need to do this, but doing this to
// better drive the ASM cache
currentEdgeCardinality =
appStatMan->joinJBBChildren(edge, edgeExtension)
->getResultCardinality();
edge += edgeExtension;
}
}
//remove it from connectedTables
connectedTables.remove(smallestRemainingTable);
}
return edge;
}
//JBBSubsetAnalysis::extendEdge
void JBBSubsetAnalysis::arrangeTablesAfterFactForStarJoinTypeI()
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis_arrangeTablesAfterFactForStarJoinTypeI_Begin"\
<<endl;
}
#endif //_DEBUG
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
// create the plan
// By this time we should have already done the following:
// 1. figured out the fact table - FT
// 2. matched the star schema pattern
// 3. computed the list of edges to be joined before the fact table - BFT
// 4. the list of tables that should be join after the fact table - AFT
//
// The BFT will be joined in the order the edges are found in
// mjStarJoinIRuleWA->listOfEdges
//
// Following is how the join tree should look like
//
// Join
// / \
// / AFT
// /
// Nested_Join
// / \
// BFT FT
//
// In this method we need to figure out the ordering of the tables in the AFT
// and then produce a plan
CANodeId factTable = factTable_;
// begin - try to figure out the ordering of the tables in AFT
// get the set of tables (BFT + FT).
// we already know the join ordering for this set
CANodeIdSet factTableSet = nodesJoinedBeforeFactTable_;
factTableSet.insert(factTable);
// get the set of table (AFT).
// we don't know the join ordering of these tables except the fact that
// they should be join after the fact table
CANodeIdSet availableNodes = availableNodes_;
#ifdef _DEBUG
//print the right child of the current join
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Table to be joined after fact table "\
<< availableNodes.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Fact table "\
<< factTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Table to be joined before fact table "\
<< nodesJoinedBeforeFactTable_.getText() << endl;
}
#endif
// this list should contain the available nodes in the order
// they should be joined
NAList<CANodeIdSet> availableNodesOrdered(CmpCommon::statementHeap(),
availableNodes.entries());
while (availableNodes.entries())
{
// Node that produces the fewest rows when joined to the factTableSet
CANodeId smallestNode = NULL_CA_ID;
CostScalar lowestReduction = 0;
for( CANodeId availableNode = availableNodes.init();
availableNodes.next(availableNode);
availableNodes.advance(availableNode))
{
JBBC * availableNodeJBBC =
availableNode.getNodeAnalysis()->getJBBC();
if (!factTableSet.legalAddition(availableNode))
continue;
//get cardinality of joining factTableset to availableNode
//to nonAvailableNodes
CostScalar joinReduction =
appStatMan->
computeJoinReduction(factTableSet, availableNode);
if ((CmpCommon::getDefault(COMP_BOOL_163) == DF_OFF) &&
!(availableNodeJBBC->getOriginalParentJoin() &&
availableNodeJBBC->getOriginalParentJoin()->isRoutineJoin()))
{
JBBSubset * factTableSetJBBSubset = factTableSet.jbbcsToJBBSubset();
CANodeIdSet availableNodeSet;
availableNodeSet += availableNode;
JBBSubset * availableNodeJBBSubset = availableNodeSet.jbbcsToJBBSubset();
ValueIdSet joinPreds = factTableSetJBBSubset->joinPredsWithOther(*availableNodeJBBSubset);
joinPreds += availableNodeJBBSubset->getPredsWithPredecessors();
if(joinPreds.entries() == 0)
{
joinReduction += 1000000;
}
}
if((smallestNode == NULL_CA_ID) ||
(joinReduction <= lowestReduction))
{
lowestReduction = joinReduction;
smallestNode = availableNode;
}
}
CANodeIdSet smallestAndAttachedNodes = smallestNode;
Join * smallestNodeParentJoin = smallestNode.getNodeAnalysis()->getJBBC()
->getOriginalParentJoin();
NABoolean smallestNodeParentIsInnerNonSemiJoin =
(!smallestNodeParentJoin) || smallestNodeParentJoin->isInnerNonSemiJoin();
if ((CmpCommon::getDefault(COMP_BOOL_49) == DF_OFF) &&
smallestNodeParentIsInnerNonSemiJoin)
// Add nodes connected to smallestNode by join on their key.
smallestAndAttachedNodes += smallestNode.getNodeAnalysis()->
getJBBC()->getJBBCsConnectedViaKeyJoins();
smallestAndAttachedNodes.intersectSet(availableNodes);
// remove the smallest node from the available nodes
availableNodes.remove(smallestAndAttachedNodes);
// add the smallest node to ordered list of available nodes
availableNodesOrdered.insert(smallestAndAttachedNodes);
// update the factTableSet for the next iteration
factTableSet.insert(smallestAndAttachedNodes);
}
// end - try to figure out the ordering of the tables in AFT
// begin - construct left deep join sequence
// The list CANodeIdSets representing the join
// sequence produced by this rule
// Element 0: is inner most / top most element in left deep join sequence
// Element 1: is left most/outer most element in left deep join sequence
UInt32 i = 0;
// first insert tables from AFT
for(i = availableNodesOrdered.entries(); i > 0; i--)
{
leftDeepJoinSequence_.insert(availableNodesOrdered[(i-1)]);
}
// insert the fact table
leftDeepJoinSequence_.insert(factTable);
NAList<CANodeIdSet> * listOfEdges = listOfEdges_;
// insert the tables from the BFT
// tables in the BFT are organized as a list of edges
for(i = optimalFTLocation_; i > 0; i--)
{
leftDeepJoinSequence_.insert((*listOfEdges)[(i-1)]);
}
// Consolidate the fringes
consolidateFringes(factTable, leftDeepJoinSequence_);
// end - construct left deep join sequence
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "JBBSubsetAnalysis_arrangeTablesAfterFactForStarJoinTypeI_End" <<endl;
}
#endif //_DEBUG
}
void JBBSubsetAnalysis::arrangeTablesAfterFactForStarJoinTypeII()
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream()
<< "JBBSubsetAnalysis_arrangeTablesAfterFactForStarJoinTypeII_Begin"
<< endl;
}
#endif //_DEBUG
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
RelExpr * result = NULL;
CANodeId factTable;
if (CmpCommon::getDefault(COMP_BOOL_1) == DF_OFF)
{
factTable = getLargestIndependentNode();
}
else
{
factTable = factTable_;
}
// -----------------------------------------------------
// Begin: Find if there is a full outer join or TSJ node
// -----------------------------------------------------
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = getJBBCs();
CANodeId fullOuterJoinOrTSJNode = NULL_CA_ID;
CANodeId currentNode = NULL_CA_ID;
for(currentNode=childSet.init();
childSet.next(currentNode);
childSet.advance(currentNode))
{
JBBC * currentJBBC = currentNode.getNodeAnalysis()->getJBBC();
if (currentJBBC->isFullOuterJoinOrTSJJBBC() &&
!(currentJBBC->getOriginalParentJoin()))
{
fullOuterJoinOrTSJNode = currentNode;
}
}
// -----------------------------------------------------------------
// End: Find if there is a outerjoin node with large num of tables
// -----------------------------------------------------------------
if(fullOuterJoinOrTSJNode != NULL_CA_ID)
factTable = fullOuterJoinOrTSJNode;
if(factTable == NULL_CA_ID)
factTable = getLargestIndependentNode();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Using factTable as outermost "\
<< factTable.getText()<< endl;
}
#endif //_DEBUG
CANodeIdSet factTableSet;
factTableSet.insert(factTable);
// Get list of JBBCs in this Multi Join, the list should be sorted by the
// after local pred cardinality of the JBBC
NAList<CANodeId> * sortedJBBCs = getNodesSortedByLocalPredsCard();
// compute list of fringes
//for tracking, which of the JBBCs in the multijoin are still available
CANodeIdSet availableNodes = getJBBCs();
availableNodes.remove(factTable);
// get tables directly connected to Fact Table
CANodeIdSet connectedTables = factTable.getNodeAnalysis()->getJBBC()
->getJoinedJBBCs();
connectedTables += factTable.getNodeAnalysis()->getJBBC()
->getJBBCsRequiringInputFromMe();
// remove connected tables from availableNodes
// this is because we want to make sure only
// one directly connected table per fringer
availableNodes -= connectedTables;
//List of fringes
NAList<CANodeIdSet> listOfFringes(CmpCommon::statementHeap(),30);
CANodeIdSet currentEdge;
UInt32 lookAhead = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_18);
for(UInt32 j=(*sortedJBBCs).entries(); j > 0; j--)
{
CANodeId jbbc = (*sortedJBBCs)[(j-1)];
if (connectedTables.contains(jbbc))
{
NABoolean isLegalAddition = factTableSet.legalAddition(jbbc);
if(!isLegalAddition)
{
availableNodes += jbbc;
continue;
}
Join * jbbcParentJoin =
jbbc.getNodeAnalysis()->getJBBC()->getOriginalParentJoin();
currentEdge = jbbc;
CANodeIdSet attachedNodes;
JBBC * nodeJBBC = jbbc.getNodeAnalysis()->getJBBC();
// build a fringe if jbbc not connect via routine join
if((!jbbcParentJoin) ||
(jbbcParentJoin->isInnerNonSemiNonTSJJoin()))
{
currentEdge = extendEdge(jbbc, availableNodes,lookAhead);
// attachedNodes are nodes connected
// to jbbc by join on their key
attachedNodes =
jbbc.getNodeAnalysis()->
getJBBC()->
getJBBCsConnectedViaKeyJoins();
attachedNodes.intersectSet(availableNodes);
}
if (CmpCommon::getDefault(COMP_BOOL_49) == DF_OFF)
{
currentEdge += attachedNodes;
availableNodes -= attachedNodes;
}
listOfFringes.insert(currentEdge);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built a fringe starting from table "\
<< jbbc.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The fringe is " \
<< currentEdge.getText()<<endl;
}
#endif
}
else{
continue;
}
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Following tables are not part of any fringe, " \
<< "put each of them as a fringe by itself: \n" \
<< availableNodes.getText()<< endl;
}
#endif
// create fringes out of each one of the remaining tables
// i.e. tables that have not been included as part of any
// fringe
for (CANodeId availableNode = availableNodes.init();
availableNodes.next(availableNode);
availableNodes.advance(availableNode))
{
listOfFringes.insert(availableNode);
}
// compute join order
NAList<CANodeIdSet> orderedListOfFringes(STMTHEAP);
orderedListOfFringes.insert(factTableSet);
while (listOfFringes.entries())
{
// fringe that produces lowest cardinality when joined to the factTableSet
CANodeIdSet smallestFringe;
CostScalar lowestReduction = 0;
for (UInt32 i = 0;
i < listOfFringes.entries();
i ++)
{
CANodeIdSet currentFringe = listOfFringes[i];
NABoolean currentFringeIsARoutine = FALSE;
if(currentFringe.entries() == 1)
{
CANodeId fringeNode =
currentFringe.getFirst();
if(!factTableSet.legalAddition(fringeNode))
continue;
JBBC * currentFringeJBBC = fringeNode.getNodeAnalysis()->getJBBC();
if (currentFringeJBBC->getOriginalParentJoin() &&
currentFringeJBBC->getOriginalParentJoin()->isRoutineJoin())
currentFringeIsARoutine = TRUE;
}
// get cardinality fo join factTableSet to currentFringe
CostScalar joinReduction =
appStatMan->
computeJoinReduction(factTableSet, currentFringe);
// xxx kkk
if ((CmpCommon::getDefault(COMP_BOOL_163) == DF_OFF) &&
!currentFringeIsARoutine)
{
JBBSubset * factTableSetJBBSubset = factTableSet.jbbcsToJBBSubset();
JBBSubset * currentFringeJBBSubset = currentFringe.jbbcsToJBBSubset();
ValueIdSet joinPreds = factTableSetJBBSubset->joinPredsWithOther(*currentFringeJBBSubset);
joinPreds += currentFringeJBBSubset->getPredsWithPredecessors();
if(joinPreds.entries() == 0)
{
joinReduction += 1000000;
}
}
if((!smallestFringe.entries()) ||
(joinReduction <= lowestReduction))
{
lowestReduction = joinReduction;
smallestFringe = currentFringe;
}
}
listOfFringes.remove(smallestFringe);
orderedListOfFringes.insert(smallestFringe);
factTableSet.insert(smallestFringe);
}
// join order is not set, the left deep join order going
// from the outer most to the inner most is:
// Fact Table, orderedListOfFringes[0], orderedListOfFringest[1] ....., orderedListOfFringes[n]
// construct the left deep join sequence going from the inner most -> outer most
//
// The list CANodeIdSets representing the join
// sequence produced by this rule
// Element 0: is inner most / top most element in left deep join sequence
// Element N: is left most/outer most element in left deep join sequence
for(UInt32 k = orderedListOfFringes.entries(); k > 0; k--)
{
leftDeepJoinSequence_.insert(orderedListOfFringes[(k-1)]);
}
// Consolidate the fringes
consolidateFringes(factTable, leftDeepJoinSequence_);
}
void JBBSubsetAnalysis::consolidateFringes(CANodeId factTable,
NAList<CANodeIdSet> &leftDeepJoinSequence)
{
if(leftDeepJoinSequence.entries() < 3)
return;
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
CANodeIdSet factTableSet;
UInt32 i= 0;
// go through the join sequence till the fact table is encountered
for(i = leftDeepJoinSequence.entries()-1;
!factTableSet.contains(factTable);
i--)
{
factTableSet.insert(leftDeepJoinSequence[i]);
if (i == 0)
break;
}
CostScalar r1 = 0;
CostScalar significanceFactor =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_21);
UInt32 numRowsLimit =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_14);
while (i > 0)
{
r1 =
appStatMan->
joinJBBChildren(factTableSet,leftDeepJoinSequence[i])->
getResultCardinality();
UInt32 j = i-1;
if (leftDeepJoinSequence[i].entries() == 1)
{
JBBC * iJBBC =
leftDeepJoinSequence[i].getFirst().
getNodeAnalysis()->getJBBC();
Join * iParentJoin =
iJBBC->getOriginalParentJoin();
if(iParentJoin &&
!(iParentJoin->isInnerNonSemiNonTSJJoin()))
{
factTableSet.insert(leftDeepJoinSequence[i]);
i--;
continue;
}
}
if (leftDeepJoinSequence[j].entries() == 1)
{
JBBC * jJBBC =
leftDeepJoinSequence[j].getFirst().
getNodeAnalysis()->getJBBC();
Join * jParentJoin =
jJBBC->getOriginalParentJoin();
if(jParentJoin &&
!(jParentJoin->isInnerNonSemiNonTSJJoin()))
{
factTableSet.insert(leftDeepJoinSequence[i]);
i--;
continue;
}
}
CostScalar cardD1D2 = 0;
if ( CmpCommon::getDefault( COMP_BOOL_117 ) == DF_OFF )
cardD1D2 =
appStatMan->
joinJBBChildren(leftDeepJoinSequence[i],leftDeepJoinSequence[j])->
getResultCardinality();
else
{
cardD1D2 =
appStatMan->
getStatsForCANodeIdSet(leftDeepJoinSequence[i])->
getResultCardinality();
cardD1D2 *=
appStatMan->
getStatsForCANodeIdSet(leftDeepJoinSequence[j])->
getResultCardinality();
}
// multiplying by 1000000 to save on COMP_FLOATs
if (((r1 * significanceFactor) > (cardD1D2 * 1000000)) &&
!(cardD1D2 > numRowsLimit))
{
leftDeepJoinSequence[i].insert(leftDeepJoinSequence[j]);
leftDeepJoinSequence.removeAt(j);
i--;
}
else
{
factTableSet.insert(leftDeepJoinSequence[i]);
i--;
}
}
}
void JBBSubsetAnalysis::addMVMatch(MVMatchPtr match, NABoolean isPreferred)
{
if (isPreferred)
matchingMVs_.insertAt(0, match);
else
matchingMVs_.insert(match);
}
// -----------------------------------------------------------------------
// Methods for class JBBWA
// -----------------------------------------------------------------------
const CASortedList * JBBWA::getNodesSortedByLocalPredsCard()
{
if(!byLocalPredsCard_)
byLocalPredsCard_ = sort(&JBBWA::getCardinalityAfterLocalPreds);
return byLocalPredsCard_;
}
const CASortedList * JBBWA::getNodesSortedByLocalKeyPrefixPredsCard()
{
if(!byLocalKeyPrefixPredsCard_)
byLocalKeyPrefixPredsCard_ = sort(&JBBWA::getCardinalityAfterLocalKeyPrefixPreds);
return byLocalKeyPrefixPredsCard_;
}
const CASortedList * JBBWA::getNodesSortedByCard()
{
if(!byNodeCard_)
byNodeCard_ = sort(&JBBWA::getNodeCardinality);
return byNodeCard_;
}
const CASortedList * JBBWA::getNodesSortedByData()
{
if(!byNodeData_)
byNodeData_ = sort(&JBBWA::getNodeDataSize);
return byNodeData_;
}
const CASortedList * JBBWA::getNodesSortedByOutputData()
{
if(!byNodeOutputData_)
byNodeOutputData_ = sort(&JBBWA::getNodeOutputDataSize);
return byNodeOutputData_;
}
// Given a node, return it's cardinality after application of local preds
// on the prefix of the clustering key. If logProps is passed in is NULL,
// then ASM will be called to compute the logical properties.
CostScalar
JBBWA::getCardinalityAfterLocalKeyPrefixPreds(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(node);
}
//return the cardinality
return logProps->getResultCardinality();
}
// Given a node, return it's data-size (i.e. cardinality * rowsize)
// after application of local preds on the prefix of the clustering key
// If logProps is passed in is NULL, then ASM will be called to compute
// the logical properties
CostScalar
JBBWA::getDataSizeAfterLocalKeyPrefixPreds(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(node);
}
//record i.e. row size for node
CostScalar nodeRecordSize = -1;
if(node.getNodeAnalysis()->getTableAnalysis())
nodeRecordSize = node.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
//return the datasize, datasize will be < 0 if this is not a table
return (logProps->getResultCardinality() * nodeRecordSize);
}
// Given a node, return it's cardinality after application of local preds.
// If logProps is passed in is NULL, then ASM will be called to compute
// the logical properties
CostScalar
JBBWA::getCardinalityAfterLocalPreds(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node);
}
//return the cardinality
return logProps->getResultCardinality();
}
// Given a node, return it's data-size (i.e. cardinality * rowsize)
// after application of local preds. If logProps is passed in is NULL,
// then ASM will be called to compute the logical properties
CostScalar
JBBWA::getDataSizeAfterLocalPreds(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node);
}
//record i.e. row size for node
CostScalar nodeRecordSize = -1;
if(node.getNodeAnalysis()->getTableAnalysis())
nodeRecordSize = node.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
//return the datasize, datasize will be < 0 if this is not a table
return (logProps->getResultCardinality() * nodeRecordSize);
}
// Given a node, return it's base cardinality
CostScalar JBBWA::getBaseCardinality(CANodeId node,
EstLogPropSharedPtr& logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//empty set, i.e. no preds are to be applied
ValueIdSet emptySet;
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node,
(*GLOBAL_EMPTY_INPUT_LOGPROP),
&emptySet);
}
//return the cardinality
return logProps->getResultCardinality();
}
// Given a node, return it's size (i.e. rowcount * rowsize)
CostScalar JBBWA::getBaseDataSize(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//empty set, i.e. no preds are to be applied
ValueIdSet emptySet;
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node,
(*GLOBAL_EMPTY_INPUT_LOGPROP),
&emptySet);
}
//record i.e. row size for node
CostScalar nodeRecordSize = -1;
if(node.getNodeAnalysis()->getTableAnalysis())
nodeRecordSize = node.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
//return the datasize, datasize will be < 0 if this is not a table
return (logProps->getResultCardinality() * nodeRecordSize);
}
// Given a node, return it's cardinality
CostScalar JBBWA::getNodeCardinality(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node);
}
//return the cardinality
return logProps->getResultCardinality();
}
// Given a node, return it's size (i.e. rowcount * rowsize)
CostScalar JBBWA::getNodeDataSize(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node);
}
//record i.e. row size for node
CostScalar nodeRecordSize = node.getNodeAnalysis()->getRecordSize();
//return the datasize, datasize will be < 0 if this is not a table
return (logProps->getResultCardinality() * nodeRecordSize);
}
// Given a node, return it's output data size (i.e. rowcount * outputrowsize)
CostScalar JBBWA::getNodeOutputDataSize(CANodeId node,
EstLogPropSharedPtr & logProps)
{
if (!logProps)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get cardinality
logProps = appStatMan->getStatsForCANodeId(node);
}
//record i.e. output row size for node
CostScalar nodeOutputRecordSize = node.getNodeAnalysis()->
getOriginalExpr()->
getGroupAnalysis()->
getGroupAttr()->
getRecordLength();
//return the datasize, datasize will be < 0 if this is not a table
return (logProps->getResultCardinality() * nodeOutputRecordSize);
}
// A generic method to sort given a method to compute the sort
// metric for each node
CASortedList* JBBWA::sort(JBBWA::shortMetricFunc getSortMetric)
{
CANodeIdSet nodesToSort = parentJBB_->getJBBCs();
CASortedList * result = new (CmpCommon::statementHeap())
CASortedList(CmpCommon::statementHeap(), nodesToSort.entries());
while(nodesToSort.entries())
{
CANodeId largestNode = nodesToSort.init();
// coverity cid 819 flags this a CHECKED_RETURN anomaly. Ignore it.
// coverity[unchecked_value]
nodesToSort.next(largestNode);
EstLogPropSharedPtr estLogProps;
CostScalar largestNodeSortMetric = (*this.*getSortMetric)(largestNode,
estLogProps);
for( CANodeId node = largestNode;
nodesToSort.next(node);
nodesToSort.advance(node))
{
estLogProps = NULL;
CostScalar nodeSortMetric = (*this.*getSortMetric)(node,
estLogProps);
if(nodeSortMetric > largestNodeSortMetric)
{
largestNode = node;
largestNodeSortMetric = nodeSortMetric;
}
}
result->insert(largestNode);
nodesToSort.remove(largestNode);
}
return result;
}
// -----------------------------------------------------------------------
// Methods for class JBB
// -----------------------------------------------------------------------
// Add this JBBC to the JBB.
void JBB::addJBBC(CANodeId jbbc)
{
mainJBBSubset_.addJBBC(jbbc);
// jbbc id must belong to JBBC node (no need to check for null)
jbbc.getNodeAnalysis()->getJBBC()->setJBB(this);
}
void JBB::addJBBC(JBBC* jbbc)
{
mainJBBSubset_.addJBBC(jbbc->getId());
jbbc->setJBB(this);
}
// Currently not used
// Set the GB in the JBB.
void JBB::setGB(CANodeId gb)
{
mainJBBSubset_.setGB(gb);
gbAnalysis_ = gb.getNodeAnalysis()->getGBAnalysis();
// gb id must belong to GBAnalysis node (no need to check for null)
gbAnalysis_->setJBB(this);
}
// Set the GB in the JBB using GBAnalysis
void JBB::setGB(GBAnalysis* gb)
{
mainJBBSubset_.setGB(gb->getGroupingNodeId());
gbAnalysis_ = gb;
gb->setJBB(this);
}
// Set the norm char inputs and outputs
void JBB::setNormInputs(const ValueIdSet & inputs)
{
normInputs_ = inputs;
}
void JBB::setNormOutputs(const ValueIdSet & outputs)
{
normOutputs_ = outputs;
}
void JBB::setInputEstLP(EstLogPropSharedPtr& inEstLP)
{
inEstLP_ = inEstLP;
}
// JBB Analysis method on Group By
// Capture the GB info and pass the call to child join
void JBB::analyze(GroupByAgg* topGBExpr)
{
// do GB Analysis stuff here.
GBAnalysis* gb = topGBExpr->getGBAnalysis();
gb->setJBB(this);
setGB(gb);
// analyse the joins below the GB
RelExpr* topExpr = (RelExpr*)(topGBExpr->child(0));
if (topExpr->getOperator().match(REL_ANY_JOIN))
analyze((Join*)topExpr);
else if (topExpr->getOperator().match(REL_SCAN))
analyze((Scan*)topExpr);
// One potential place to do the R1 and R2 computation
// for GBAnalysis
return;
}
// Main JBB Analysis method for Single JBB with GroupBy (MVQR only for now)
void JBB::analyze(Scan* topScanExpr)
{
RelExpr* expr = topScanExpr;
JBBC* jbbc = NULL;
// A CANode must have been assigned to this child in pilot analysis
CMPASSERT(expr->getGroupAnalysis() &&
expr->getGroupAnalysis()->getNodeAnalysis());
jbbc = expr->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
addJBBC(jbbc);
}
// Main JBB Analysis method
void JBB::analyze(Join* topJoinExpr)
{
RelExpr* expr = topJoinExpr;
ValueIdSet joinPreds; // accumulation of join selectionPreds
JBBC* jbbc = NULL;
// change start
while (expr->getOperator().match(REL_ANY_JOIN)&&
((Join*)expr)->canBePartOfJBB())
{
joinPreds += expr->getSelectionPred();
expr = expr->child(0);
}
ValueIdSet freeRadicalPreds = joinPreds;
expr = topJoinExpr;
ValueIdSet childCharOutputs;
while (expr->getOperator().match(REL_ANY_JOIN)&&
((Join*)expr)->canBePartOfJBB())
{
// A CANode must have been assigned to this child in pilot analysis
CMPASSERT(expr->child(1)->getGroupAnalysis() &&
expr->child(1)->getGroupAnalysis()->getNodeAnalysis());
jbbc = expr->child(1)->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
addJBBC(jbbc);
ValueIdSet jbbcJoinPreds;
childCharOutputs = expr->child(1)->getGroupAttr()->getCharacteristicOutputs();
// Soln. 10-100203-7927. If we have NullInstantiate(VEG{that contains a constant})
// then that veg is not included in child charOutputs. The following IF block
// is to include such VEGs so that we can figure out the correct join linkages.
Join * parentJoin = jbbc->getOriginalParentJoin() ;
if (parentJoin && parentJoin->getOperator().match(REL_ANY_LEFT_JOIN))
childCharOutputs += (ValueIdSet) parentJoin->nullInstantiatedOutput();
jbbcJoinPreds.accumulateReferencingExpressions(joinPreds, childCharOutputs);
jbbc->setJoinPreds(jbbcJoinPreds);
freeRadicalPreds -= jbbcJoinPreds;
expr = expr->child(0);
}
// do the most left child here
// A CANode must have been assigned to this child in pilot analysis
CMPASSERT(expr->getGroupAnalysis() &&
expr->getGroupAnalysis()->getNodeAnalysis());
jbbc = expr->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
addJBBC(jbbc);
ValueIdSet jbbcJoinPreds;
childCharOutputs = expr->getGroupAttr()->getCharacteristicOutputs();
jbbcJoinPreds.accumulateReferencingExpressions(joinPreds, childCharOutputs);
jbbc->setJoinPreds(jbbcJoinPreds);
freeRadicalPreds -= jbbcJoinPreds;
expr = topJoinExpr;
while (expr->getOperator().match(REL_ANY_JOIN)&&
((Join*)expr)->canBePartOfJBB())
{
if(expr->getOperatorType() == REL_LEFT_JOIN)
{
ValueIdSet freePredsToAddBack = freeRadicalPreds;
freePredsToAddBack.intersectSet(expr->getSelectionPred());
jbbc = expr->child(1)->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
ValueIdSet jbbcJoinPreds = jbbc->getJoinPreds();
jbbcJoinPreds += freePredsToAddBack;
jbbc->setJoinPreds(jbbcJoinPreds);
}
expr = expr->child(0);
}
// change end
/*
old one
while (expr->getOperator().match(REL_ANY_JOIN))
{
ValueIdSet childCharOutputs =
expr->child(1)->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet otherChildCharOutputs =
expr->child(0)->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet selectionPreds = expr->getSelectionPred();
jbbc = expr->child(1)->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
addJBBC(jbbc);
// Add any selectionPreds or a (joinPreds that reference child's
// char output) to joinPreds_ or nonLocalOrPreds_(future) of the JBBC
// Warning: we are not doing the OR case yet
ValueIdSet jbbcJoinPreds = selectionPreds;
jbbcJoinPreds.accumulateReferencingExpressions(joinPreds, childCharOutputs);
jbbc->setJoinPreds(jbbcJoinPreds);
joinPreds.removeNonReferencingExpressions(otherChildCharOutputs);
joinPreds.accumulateReferencingExpressions(selectionPreds,
otherChildCharOutputs);
expr = expr->child(0);
}
// do the most left child here
jbbc = expr->getGroupAnalysis()->getNodeAnalysis()->getJBBC();
addJBBC(jbbc);
jbbc->setJoinPreds(joinPreds);
*/
// now that all JBBCs are added and their predicates are set
// compute the other fields in the JBBC
const CANodeIdSet & jbbcs = getJBBCs();
CANodeId i,j;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
JBBC* jbbci = i.getNodeAnalysis()->getJBBC();
for (j=i+1; jbbcs.next(j); jbbcs.advance(j))
{
JBBC* jbbcj = j.getNodeAnalysis()->getJBBC();
ValueIdSet commonPreds = jbbci->getJoinPreds();
commonPreds.intersectSet(jbbcj->getJoinPreds());
if (commonPreds.entries() > 0)
{
jbbci->joinedJBBCs_.insert(j);
jbbcj->joinedJBBCs_.insert(i);
}
}
}
}
void JBB::analyzeChildrenDependencies()
{
const CANodeIdSet & jbbcs = getJBBCs();
CANodeId i,j;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
JBBC* jbbci = i.getNodeAnalysis()->getJBBC();
RelExpr * const expri = i.getNodeAnalysis()->getOriginalExpr();
GroupAttributes * const attri = expri->getGroupAttr();
ValueIdSet charOutputsi = attri->getCharacteristicOutputs();
// set inputs required from sibling JBBCs
ValueIdSet inputsRequiredFromSiblings;
if(jbbci->getOriginalParentJoin() &&
jbbci->getOriginalParentJoin()->isRoutineJoin())
{
inputsRequiredFromSiblings = attri->getCharacteristicInputs();
inputsRequiredFromSiblings -= getNormInputs();
jbbci->setInputsRequiredFromSiblings(inputsRequiredFromSiblings);
}
for (j= jbbcs.init(); jbbcs.next(j); jbbcs.advance(j))
{
if(i == j)
continue;
JBBC* jbbcj = j.getNodeAnalysis()->getJBBC();
RelExpr * const exprj = j.getNodeAnalysis()->getOriginalExpr();
GroupAttributes * const attrj = exprj->getGroupAttr();
Join * parentJoinj = jbbcj->getOriginalParentJoin();
// find if j depends on i
ValueIdSet predsWithDependencies = jbbci->getPredsWithSuccessors();
predsWithDependencies.intersectSet(jbbcj->getPredsWithPredecessors());
if (predsWithDependencies.entries() > 0)
{
jbbci->successorJBBCs_.insert(j);
jbbcj->predecessorJBBCs_.insert(i);
}
if(inputsRequiredFromSiblings.entries())
{
ValueIdSet charOutputsj = attrj->getCharacteristicOutputs();
if(charOutputsj.intersectSet(inputsRequiredFromSiblings).entries())
{
jbbci->predecessorJBBCs_.insert(j);
jbbci->jbbcsProvidingInput_.insert(j);
jbbcj->successorJBBCs_.insert(i);
jbbcj->jbbcsRequiringInputFromMe_.insert(i);
}
}
}
}
}
void JBB::computeLeftJoinFilterPreds()
{
const CANodeIdSet & jbbcs = getJBBCs();
CANodeId i;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
JBBC* jbbci = i.getNodeAnalysis()->getJBBC();
if(!jbbci->parentIsLeftJoin())
continue;
CANodeIdSet jbbciIdSet;
jbbciIdSet.insert(i);
JBBSubset * jbbciSubset = jbbciIdSet.jbbcsToJBBSubset();
CANodeIdSet othersIdSet(getJBBCs());
othersIdSet -= i;
JBBSubset * othersJBBSubset = othersIdSet.jbbcsToJBBSubset();
ValueIdSet predsWithOthers =
jbbciSubset->joinPredsWithOther(*othersJBBSubset);
ValueIdSet leftJoinFilterPreds = jbbci->getJoinPreds();
leftJoinFilterPreds -= predsWithOthers;
// compute predicates that are covered by the JBBC's output
ValueIdSet predsCoveredByJBBC;
for(ValueId joinPred = predsWithOthers.init();
predsWithOthers.next(joinPred);
predsWithOthers.advance(joinPred))
{
CANodeIdSet jbbcNodeSet;
jbbcNodeSet.insert(i);
JBBSubset * jbbcSubset = jbbcNodeSet.jbbcsToJBBSubset();
if(jbbcSubset->coversExpr(joinPred))
predsCoveredByJBBC += joinPred;
}
leftJoinFilterPreds += predsCoveredByJBBC;
// set the left join filter preds
jbbci->setLeftJoinFilterPreds(leftJoinFilterPreds);
}
}
void JBB::computeChildrenSubGraphDependencies()
{
/*
CANodeIdSet jbbcs = getJBBCs();
//SubGraphs produced by excluding nodes that are not innerNonSemiNonTSJ
CANodeIdSet subGraph = jbbcs;
subGraph.intersectSet(QueryAnalysis::Instance()->getInnerNonSemiNonTSJJBBCs());
CANodeIdSet jbbcsNotInSubGraph = jbbcs;
jbbcsNotInSubGraph -= subGraph;
NAList<CANodeIdSet*>* innerJoinedJBBCsConnectedSubGraphs =
subGraph.jbbcsToJBBSubset()->getJBBSubsetAnalysis()->getConnectedSubgraphs();
int numInnerJoinedJBBCsConnectedSubGraphs =
innerJoinedJBBCsConnectedSubGraphs->entries();
// SubGraphs produced by just following inner joins, i.e. not traversing
// successor/predecessor relationships (i.e. left, semi, routine joins)
NABoolean followSuccessors = FALSE;
NAList<CANodeIdSet*>* innerJoinConnectedSubGraphs =
jbbcs.jbbcsToJBBSubset()->getJBBSubsetAnalysis()->getConnectedSubgraphs(followSuccessors);
int numInnerJoinConnectedSubGraphs =
innerJoinConnectedSubGraphs->entries();
*/
}
NABoolean JBB::hasMandatoryXP() const
{
NAList<CANodeIdSet*>* connectedSubgraphs =
getMainJBBSubset().getJBBSubsetAnalysis()->getConnectedSubgraphs();
if(connectedSubgraphs)
return (connectedSubgraphs->entries() > 1);
else
return FALSE;
}
const NAString JBB::graphDisplay(const QueryAnalysis* qa) const
{
NAString result = "";
CANodeIdSet jbbcs = mainJBBSubset_.getJBBCs();
CANodeIdSet usedJbbcs_;
AppliedStatMan * appStatMan = qa->getASM();
for (CANodeId x= jbbcs.init(); jbbcs.next(x); jbbcs.advance(x) )
{
CANodeIdSet cons = x.getNodeAnalysis()->getJBBC()->getJoinedJBBCs();
cons -= usedJbbcs_;
NAString name;
if (x.getNodeAnalysis()->getTableAnalysis())
{
name = x.getNodeAnalysis()->getTableAnalysis()->getTableDesc()
->getNATable()->getTableName().getObjectName();
}
else
{
name = "JBBC:#" + istring(x);
}
ValueIdSet emptySet;
CostScalar cardx0 = 0;
CostScalar cardx1 = 0;
TableAnalysis* tableAn = x.getNodeAnalysis()->getTableAnalysis();
if (tableAn)
{
cardx0 = tableAn->getCardinalityOfBaseTable();
cardx1 = tableAn->getCardinalityAfterLocalPredsOnCKPrefix();
}
CostScalar cardx2 = x.getNodeAnalysis()->getCardinality();
ULng32 intx0 = (Int32) cardx0.value();
ULng32 intx1 = (Int32) cardx1.value();
ULng32 intx2 = (Int32) cardx2.value();
result += istring(x) + " [label =\"" + name + ":\\n" + istring(intx0)
+ "\\n" + istring(intx1) + "\\n" + istring(intx2) + "\"];\n";
for (CANodeId y= cons.init(); cons.next(y); cons.advance(y))
{
CostScalar cardy = appStatMan->getStatsForCANodeId(y)->getResultCardinality();
CANodeIdSet xy = CANodeIdSet(x);
xy += y;
CANodeIdSet xSet = CANodeIdSet(x);
CANodeIdSet ySet = CANodeIdSet(y);
CostScalar cardxy = appStatMan->joinJBBChildren(xSet, ySet)\
->getResultCardinality();
CostScalar xcardy = appStatMan->getStatsForJoinPredsOnCKOfJBBC(xSet,y)\
->getResultCardinality();
CostScalar ycardx = appStatMan->getStatsForJoinPredsOnCKOfJBBC(ySet,x)\
->getResultCardinality();
ULng32 ixy = (Int32) cardxy.value();
ULng32 ixjy = (Int32) xcardy.value();
ULng32 iyjx = (Int32) ycardx.value();
result += istring(x) + " -- " + istring(y) + " [label =\""
+ istring(ixjy) + " , " + istring(ixy) + " , " + istring(iyjx) + "\"];\n";
}
usedJbbcs_ += x;
}
return result;
}
const NAString JBB::getText() const
{
NAString result("JBB # ");
result += istring(jbbId_) + " :\n";
result += "mainJBBSubset_ :" + mainJBBSubset_.getText();
CANodeIdSet jbbcs = mainJBBSubset_.getJBBCs();
for (CANodeId x= jbbcs.init(); jbbcs.next(x); jbbcs.advance(x) )
{
result += x.getNodeAnalysis()->getJBBC()->getText();
}
// add group by analysis here
if (gbAnalysis_) {
result += "\nGBAnalysis: \n";
result += gbAnalysis_->getText();
result += "\n";
GroupByAgg *gba = gbAnalysis_->getOriginalGBExpr();
ValueIdSet gbvis = gba->groupExpr();
result += "GBItem Expression:\n";
for (ValueId gbvid = gbvis.init(); gbvis.next(gbvid); gbvis.advance(gbvid)) {
ItemExpr *gbie = gbvid.getItemExpr();
gbie->unparse(result, OPTIMIZER_PHASE, MVINFO_FORMAT);
result += "\n";
}
}
return result;
}
void JBB::display() const
{
print();
}
void JBB::print (FILE *f,
const char * prefix,
const char * suffix) const
{
fprintf (f, getText());
}
// -----------------------------------------------------------------------
// Methods for class ColAnalysis
// -----------------------------------------------------------------------
const NAString ColAnalysis::getText() const
{
NAString result("ColAnalysis # ");
result += istring(column_) + ":\n";
Int32 i = (Int32)((CollIndex) column_); // valueid as an integer
NAString unparsed(CmpCommon::statementHeap());
column_.getItemExpr()->unparse(unparsed); // expression as ascii string
result += unparsed + "\n";
result += "vegPreds: " + valueIdSetGetText(vegPreds_) + "\n";
result += "referencingPreds: " + valueIdSetGetText(referencingPreds_) + "\n";
return result;
}
void ColAnalysis::finishAnalysis()
{
connectedCols_.clear();
for (ValueId pred = vegPreds_.init();
vegPreds_.next(pred);
vegPreds_.advance(pred))
{
connectedCols_ += pred.predAnalysis()->getVegConnectedCols();
}
// remove self and cols from same table
connectedCols_ -= table_->getUsedCols();
// Now compute connectedTables_;
connectedTables_.clear();
for (ValueId c= connectedCols_.init();
connectedCols_.next(c);
connectedCols_.advance(c) )
{
connectedTables_ += c.colAnalysis()->
getTableAnalysis()->getNodeAnalysis()->getId();
}
// assert self is not there
return;
}
// Currently not used
ValueIdSet ColAnalysis::getConnectingVegPreds(ValueId other)
{
ValueIdSet result = vegPreds_;
result.intersectSet(other.colAnalysis()->getVegPreds());
return result;
}
// find all local preds referencing this column
// I am considering caching this localPred xxx
const ValueIdSet ColAnalysis::getLocalPreds() const
{
ValueIdSet result(table_->getLocalPreds());
result.intersectSet(referencingPreds_);
return result;
}
// This code was being called from LargeScopeRule.cpp method
// MJStarJoinIRule::isAStarPattern but that code is no longer used
// Get all sibling JBBCs that are joined to this table on this column
// (via VEG predicates).
CANodeIdSet ColAnalysis::getConnectedJBBCs()
{
// Must be JBBC
JBBC* myJBBC = table_->getNodeAnalysis()->getJBBC();
CMPASSERT(myJBBC);
CANodeIdSet jbbcs(myJBBC->getJoinedJBBCs());
CANodeIdSet myJoinedJBBCs;
CANodeId i;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
ValueIdSet commonPreds = i.getNodeAnalysis()->getJBBC()->getJoinPreds();
// xxx for now we only consider veg preds
// In the future we may include range and non-Veg-equality as well (still not decided)
commonPreds.intersectSet(vegPreds_);
if (commonPreds.entries() > 0)
{
myJoinedJBBCs.insert(i);
}
}
return myJoinedJBBCs;
}
// Get all sibling JBBCs that are joined to this table on this column
// these include JBBCs joined via non_VEG preds like t1.a = t2.b + 7
CANodeIdSet ColAnalysis::getAllConnectedJBBCs()
{
// Must be JBBC
JBBC* myJBBC = table_->getNodeAnalysis()->getJBBC();
CMPASSERT(myJBBC);
CANodeIdSet jbbcs(myJBBC->getJoinedJBBCs());
CANodeIdSet myJoinedJBBCs;
CANodeId i;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
if(equalityConnectedJBBCs_.contains(i))
{
myJoinedJBBCs.insert(i);
continue;
}
ValueIdSet commonPreds = i.getNodeAnalysis()->getJBBC()->getJoinPreds();
commonPreds.intersectSet(vegPreds_);
if (commonPreds.entries() > 0)
{
myJoinedJBBCs.insert(i);
}
}
return myJoinedJBBCs;
}
// This is no longer used. Seems to have been replaced by getAllConnectingPreds
// Get connecting Veg Preds between me and this JBBC.
ValueIdSet ColAnalysis::getConnectingPreds(JBBC* jbbc)
{
ValueIdSet commonPreds = jbbc->getJoinPreds();
// xxx for now we only consider veg preds
// In the future we may include range and non-Veg-equality as well (still not decided)
commonPreds.intersectSet(vegPreds_);
return commonPreds;
}
// Get connecting Veg Preds between me and this JBBC.
ValueIdSet ColAnalysis::getAllConnectingPreds(JBBC* jbbc)
{
ValueIdSet commonPreds = jbbc->getJoinPreds();
// xxx for now we only consider veg preds
// In the future we may include range and non-Veg-equality as well (still not decided)
ValueIdSet connectingPreds = vegPreds_;
if(equalityConnectedJBBCs_.contains(jbbc->getId()))
{
for (ValueId pred = equalityConnectingPreds_.init();
equalityConnectingPreds_.next(pred);
equalityConnectingPreds_.advance(pred))
{
const CANodeIdSet * jbbcsConnectedViaPred =
table_->getEqualityConnectedJBBCs(pred);
if(jbbcsConnectedViaPred &&
jbbcsConnectedViaPred->contains(jbbc->getId()))
connectingPreds += pred;
}
}
commonPreds.intersectSet(connectingPreds);
return commonPreds;
}
// ****************************************************************
// this will be changed to a getText method for ValueIdSet
NAString valueIdSetGetText(const ValueIdSet & set)
{
NAString result("ValueIdSet {");
for (ValueId x = set.init(); set.next(x); set.advance(x))
{
Int32 i = (Int32)((CollIndex) x); // valueid as an integer
NAString unparsed(CmpCommon::statementHeap());
if (x.getItemExpr())
{
x.getItemExpr()->unparse(unparsed); // expression as ascii string
result += unparsed;
result += " ";
}
}
result += "}";
return result;
}
// FALSE does not mean column is not a constant. It means we did not verify
// that it is a constant.
NABoolean ColAnalysis::getConstValue(ItemExpr* & cv,
NABoolean refAConstValue) const
{
ValueIdSet localPreds(getLocalPreds());
// iterate over ValueIdSet to find a VEGPred
for( ValueId vid = localPreds.init();
localPreds.next(vid);
localPreds.advance(vid))
{
const ItemExpr * ie = vid.getItemExpr();
if (ie->getOperatorType() == ITM_VEG_PREDICATE)
{
VEG *veg = ((VEGPredicate *)ie)->getVEG();
ValueIdSet vegMembers = veg->getAllValues();
// See if the VEG group contains a constant.
// NOTE: A VEG group should not contain more than 1 constant; but if
// that should happen, referencesAConstValue will return the first
// constant found.
if (refAConstValue)
{
if (vegMembers.referencesAConstValue(&cv))
{
return TRUE;
}
}
else
{
if (vegMembers.referencesAConstExpr(&cv))
{
return TRUE;
}
}
}
}
return FALSE;
}
NAString istring (ULng32 i)
{
char buffer[20];
str_itoa(i, buffer);
return NAString(buffer);
}
JBBSubsetAnalysis* GroupByAgg::getJBBSubsetAnalysis()
{
JBBSubsetAnalysis* jbbSubsetAnalysis = NULL;
GBAnalysis* gb = NULL;
JBB * jbb = NULL;
if ( (gb = getGBAnalysis()) &&
(jbb = gb->getJBB())
)
{
jbbSubsetAnalysis = jbb->getMainJBBSubset().getJBBSubsetAnalysis();
}
return jbbSubsetAnalysis;
}
JBBSubsetAnalysis* Scan::getJBBSubsetAnalysis()
{
JBBSubsetAnalysis* jbbSubsetAnalysis = NULL;
GroupAttributes *groupAttr = NULL;
GroupAnalysis *groupAnal = NULL;
JBBSubset *jbbSubset = NULL;
JBBC * myJBBC = NULL;
if ((groupAttr = getGroupAttr()) &&
(groupAnal = groupAttr->getGroupAnalysis()) &&
(groupAnal->getAllSubtreeTables().entries()) &&
(jbbSubset = groupAnal->getAllSubtreeTables().jbbcsToJBBSubset()) &&
(jbbSubset->getJBB()))
{
jbbSubsetAnalysis = jbbSubset->getJBBSubsetAnalysis();
}
return jbbSubsetAnalysis;
}
// ****************************************************************