blob: 74c85f58312779ae192a91baaffed81eaf34a77b [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: LargeScopeRules.cpp
* Description: Large Scope Rules classes and methods
*
*
* Created: 6/10/2003
* Language: C++
*
*
*
*
******************************************************************************
*/
#include "GroupAttr.h"
#include "TransRule.h"
#include "Analyzer.h"
#include "MultiJoin.h"
#include "AppliedStatMan.h"
#include "opt.h"
extern THREAD_P NAUnsigned MJEnumRuleNumber;
extern THREAD_P NAUnsigned MJStarJoinIRuleNumber;
extern THREAD_P NAUnsigned MJStarJoinIIRuleNumber;
extern THREAD_P NAUnsigned RoutineJoinToTSJRuleNumber;
// Excluding MJExpandRule related code since it is there for testing purposes
// MJExpandRule is not exercised during normal execution
// -----------------------------------------------------------------------
// methods for MJExpandRule
// -----------------------------------------------------------------------
NABoolean MJExpandRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// Get a handle to the MultiJoin on which we are performing the topMatch()
MultiJoin* mjoin = (MultiJoin*) expr;
// should we enumrate the user specified join order in pass 2 among other
// join orders
// Return FALSE if any one of the following is FALSE:
// * check if CQD is ON
// * check if optimization pass 2
// * check if this is the top MultiJoin, i.e. MultiJoin that represents the
// whole JBB
if (CmpCommon::getDefault(MULTI_JOIN_CONSIDER_INITIAL_JOIN_ORDER) == DF_OFF)
return FALSE;
if(GlobalRuleSet->getCurrentPassNumber() != 2)
return FALSE;
if(mjoin->getJBBSubset().getJBBCs() !=
mjoin->getJBBSubset().getJBB()->getJBBCs())
return FALSE;
return TRUE;
}
RelExpr * MJExpandRule::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &memory)
{
MultiJoin * mjoin = (MultiJoin *) before;
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
NABoolean fixJoinOrder = TRUE;
NABoolean createPriviledgedJoins = TRUE;
Join* result = mjoin->leftLinearize(fixJoinOrder, createPriviledgedJoins);
// synth the join
result->synthLogProp();
// synth the left child too
result->child(0)->synthLogProp();
return result;
}
Int32 MJExpandRule::promiseForOptimization (RelExpr * expr,
Guidance *,
Context *)
{
// should be higher than the promise for MJEnumRule
return DefaultTransRulePromise + 1;
}
// -----------------------------------------------------------------------
// methods for MVQRRule
// -----------------------------------------------------------------------
NABoolean MVQRRule::topMatch (RelExpr * expr,
Context * context)
{
// For optimization levels below the medium level we do not run the MVQRRule
if (CURRSTMT_OPTDEFAULTS->optLevel() < OptDefaults::MEDIUM)
return FALSE;
// rule is disabled
if (CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) < 1)
return FALSE;
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// conditions:
// 1- do we have any matching mvs
// 2- have we already processed them
if ( (((MultiJoin *) expr)->getJBBSubset().getJBBSubsetAnalysis()->getMatchingMVs().entries()) &&
!(((MultiJoin *) expr)->getJBBSubset().getJBBSubsetAnalysis()->getMatchingMVs()[0]->alreadyOptimized())
)
return TRUE;
return FALSE;
}
RelExpr * MVQRRule::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &memory)
{
RelExpr *result;
MultiJoin * mjoin = (MultiJoin *) before;
if (memory == NULL)
{
// -----------------------------------------------------------------
// this is the first call, create info on all matches
// -----------------------------------------------------------------
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
CollIndex numMatches = mjoin->getJBBSubset().getJBBSubsetAnalysis()->getMatchingMVs().entries();
if (numMatches > 0)
{
// allocate a new memory for multiple substitutes
memory = new (CmpCommon::statementHeap())
RuleSubstituteMemory(CmpCommon::statementHeap());
for (CollIndex matchIndex = 0; matchIndex < numMatches; matchIndex++)
{
// get the next match
MVMatchPtr match = mjoin->getJBBSubset().getJBBSubsetAnalysis()->getMatchingMVs()[matchIndex];
match->setAlreadyOptimized();
RelExpr *matchExpr = match->getMvRelExprTree();
// now set the group attributes of the result's top node
matchExpr->setGroupAttr(before->getGroupAttr());
// insert the match into the substitute memory
memory->insert(matchExpr);
} // for each match
} // if (numMatches > 0)
} // memory == NULL
// ---------------------------------------------------------------------
// handle case of multiple substitutes
// ---------------------------------------------------------------------
if (memory != NULL)
{
result = memory->getNextSubstitute();
if (result == NULL)
{
// returned all the substitutes
// now delete the substitute memory, so we won't be called again
delete memory;
memory = NULL;
}
else
{
// synth the MVI
result->synthLogProp();
}
// return the next retrieved substitute
return result;
}
else
return NULL; // rule didn't fire
}
// -----------------------------------------------------------------------
// methods for MVQRScanRule
// -----------------------------------------------------------------------
NABoolean MVQRScanRule::topMatch (RelExpr * expr,
Context * context)
{
// For optimization levels below the medium level we do not run the MVQRScanRule
if (CURRSTMT_OPTDEFAULTS->optLevel() < OptDefaults::MEDIUM)
return FALSE;
// rule is disabled
if (CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) < 1)
return FALSE;
// the case of an aggregate query on a single table, the matching information
// is included part the JBBSubset
if ((expr->getOperator().match(REL_SCAN) &&
((Scan *) expr)->getJBBSubsetAnalysis() &&
((Scan *) expr)->getJBBSubsetAnalysis()->getMatchingMVs().entries()))
{
// DebugBreak();
return TRUE;
}
// for the case of a simple query on a single table, the matching information
// can be anchored at the SCAN operator
if ((expr->getOperator().match(REL_SCAN) &&
((Scan *) expr)->getMatchingMVs().entries()))
{
// DebugBreak();
return TRUE;
}
return FALSE;
}
RelExpr * MVQRScanRule::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &memory)
{
RelExpr *result = NULL;
Scan * scan = (Scan *) before;
if (memory == NULL)
{
// -----------------------------------------------------------------
// this is the first call, create info on all matches
// -----------------------------------------------------------------
JBBSubsetAnalysis *jbbSubsetAnalysis = scan->getJBBSubsetAnalysis();
NAList<MVMatch*> matchingMVs(STMTHEAP);
// the case of an aggregate query on a single table
if (jbbSubsetAnalysis)
{
matchingMVs = jbbSubsetAnalysis->getMatchingMVs();
}
else // else it is a simple non-aggregate query on a single table
{
matchingMVs = scan->getMatchingMVs();
}
CollIndex numMatches = matchingMVs.entries();
if (numMatches > 0)
{
// allocate a new memory for multiple substitutes
memory = new (CmpCommon::statementHeap())
RuleSubstituteMemory(CmpCommon::statementHeap());
for (CollIndex matchIndex = 0; matchIndex < numMatches; matchIndex++)
{
// get the next match
MVMatchPtr match = matchingMVs[matchIndex];
RelExpr *matchExpr = match->getMvRelExprTree();
// now set the group attributes of the result's top node
matchExpr->setGroupAttr(before->getGroupAttr());
// insert the match into the substitute memory
memory->insert(matchExpr);
} // for each match
} // if (numMatches > 0)
} // memory == NULL
// ---------------------------------------------------------------------
// handle case of multiple substitutes
// ---------------------------------------------------------------------
if (memory != NULL)
{
result = memory->getNextSubstitute();
if (result == NULL)
{
// returned all the substitutes
// now delete the substitute memory, so we won't be called again
delete memory;
memory = NULL;
}
else
{
// synth the MVI
result->synthLogProp();
}
// return the next retrieved substitute
return result;
}
else
return NULL; // rule didn't fire
}
// -----------------------------------------------------------------------
// methods for MJEnumRule
// -----------------------------------------------------------------------
NABoolean MJEnumRule::topMatch (RelExpr * expr,
Context * context)
{
// For optimization levels below the medium level we do not run the MJEnumRule
if (CURRSTMT_OPTDEFAULTS->optLevel() < OptDefaults::MEDIUM)
return FALSE;
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// if any selective LSR plan is forced (currently only star join) then we should
// not fire the MJEnumRule
if(CmpCommon::getDefault(DIMENSIONAL_QUERY_OPTIMIZATION) == DF_ON)
return FALSE;
// If this is a very complex query, we would rather not do MJEnumRule and
// give MJPrimeTableRule plans better chance to finish. We will take a complexity
// of a n-way join here where n is set by CURRSTMT_OPTDEFAULTS->getMJEnumLimit()
// We only do this for optimization level Medium.
Int32 base = CURRSTMT_OPTDEFAULTS->getMJEnumLimit() ;
if ((CURRSTMT_OPTDEFAULTS->optLevel() == OptDefaults::MEDIUM) AND
(CURRSTMT_OPTDEFAULTS->getQueryComplexity() > base * pow(2,base-1)) AND
(CmpCommon::getDefault(COMP_BOOL_2) == DF_OFF)) // make sure PTrule is ON
return FALSE;
MultiJoin* mjoin = (MultiJoin*) expr;
// The Enum rule fires only on the top MultiJoin in a JBB
// and following recursive calls which are marked by the scheduledLSRs set.
if (mjoin->getJBBSubset().getJBBCs() != mjoin->getJBBSubset().getJBB()->getJBBCs() AND
!mjoin->scheduledLSRs().contains(MJEnumRuleNumber))
return FALSE;
//get the StarBDRuleConfidence
Int32 starBDConfidence = mjoin->getLSRConfidence()->getStarBDRuleConfidence();
if((starBDConfidence != -1)&&
(starBDConfidence > ((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_15)))&&
(mjoin->getJBBSubset().getJBBCs().entries() > 4))
return FALSE;
return TRUE;
}
RelExpr * MJEnumRule::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &memory)
{
RelExpr *result = NULL;
MultiJoin * mjoin = (MultiJoin *) before;
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
if (memory == NULL)
{
// allocate a new memory for multiple substitutes
memory = new (CmpCommon::statementHeap())
RuleSubstituteMemory(CmpCommon::statementHeap());
const CANodeIdSet & jbbcs = mjoin->getJBBSubset().getJBBCs();
CMPASSERT (mjoin->getJBBSubset().getGB() == NULL_CA_ID)
Lng32 mySubgraphs = mjoin->getJBBSubset().numConnectedSubgraphs();
Int32 numSubstitutes = 0; // number of substitutes enumerated
// cardinality of the joins produced by this rule
CostScalar joinCardinality =
before->getGroupAttr()->getResultCardinalityForEmptyInput();
NABoolean doSortBasedOnDataFlow =
(CmpCommon::getDefault(COMP_BOOL_116) == DF_OFF);
// Data flow optimization
CostScalar childrenFlow = mjoin->getChildrenDataFlow();
const Lng32 numChildren = jbbcs.entries();
Lng32 childIter = -1; // temp iterator. *NOT* the same as child index
RelExpr** potentialSubstitutes = new (CmpCommon::statementHeap()) RelExpr*[numChildren];
CostScalar* substituteMetric = new (CmpCommon::statementHeap()) CostScalar[numChildren];
CANodeId * substituteRightChild = new (CmpCommon::statementHeap()) CANodeId[numChildren];
// inspectors ignore this part
CostScalar minSubstituteMetric = -1;
CostScalar* substituteMetric2 = new (CmpCommon::statementHeap()) CostScalar[numChildren];
CostScalar minSubstituteMetric2 = -1;
CostScalar nextMinSubstituteMetric2 = -1;
// inspectors ignore ends
// is there a multiplier present
NABoolean multiplierPresent = FALSE;
// is there are jbbc that provides an equalizing join
NABoolean guaranteedEqualizerPresent = FALSE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJEnum Rule Application on " << mjoin->getJBBSubset().getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Level " << mjoin->getArity() << ": " << childrenFlow.value() << endl;
}
#endif
// allow cross product control if:
// 1) cross_product_control is active AND
// 2) multijoin has >1 base table OR multijoin arity > 5
NABoolean allowCrossProductControl =
(CURRSTMT_OPTDEFAULTS->isCrossProductControlEnabled()) AND
(mjoin->getGroupAttr()->getGroupAnalysis()->
getAllSubtreeTables().entries() > 1 OR numChildren > 5);
// enumerate the joins - Begin
CANodeId i;
for (i = jbbcs.init(); jbbcs.next(i); jbbcs.advance(i))
{
// pick this child to make right child of join
CANodeId jbbcRight = i;
JBBC * rightJBBC = i.getNodeAnalysis()->getJBBC();
Join * rightJBBCParent = rightJBBC->getOriginalParentJoin();
CANodeIdSet right(jbbcRight);
CANodeIdSet left(jbbcs);
left -= jbbcRight;
JBBSubset * leftSubset = left.jbbcsToJBBSubset();
if(leftSubset && !leftSubset->legal())
continue;
if(rightJBBCParent && rightJBBCParent->isRoutineJoin())
// && (jbbcs.entries()>2)
{
CANodeIdSet jbbcsProvidingInput = rightJBBC->getJBBCsProvidingInput();
CANodeIdSet rightNodes = right;
rightNodes += jbbcsProvidingInput;
rightNodes.intersectSet(jbbcs);
if((rightNodes.entries() != jbbcs.entries()) &&
jbbcsProvidingInput.entries())
{
jbbcsProvidingInput.intersectSet(jbbcs);
JBBSubset * inputSubset = jbbcsProvidingInput.jbbcsToJBBSubset();
CANodeIdSet otherSet = left;
otherSet -= jbbcsProvidingInput;
JBBSubset * otherJBBSubset = otherSet.jbbcsToJBBSubset();
CANodeIdSet jbbcsJoinedToOther = otherJBBSubset->getJoinedJBBCs();
CANodeIdSet jbbcsNeededByOther = otherJBBSubset->getPredecessorJBBCs();
CANodeIdSet jbbcsNeededByInput = inputSubset->getPredecessorJBBCs();
jbbcsJoinedToOther.intersectSet(jbbcsProvidingInput);
jbbcsNeededByOther.intersectSet(jbbcsProvidingInput);
jbbcsNeededByInput.intersectSet(otherSet);
if((!jbbcsJoinedToOther.entries()) &&
(!jbbcsNeededByOther.entries()) &&
(!jbbcsNeededByInput.entries()))
{
if(otherJBBSubset->legal() &&
rightNodes.jbbcsToJBBSubset()->legal())
{
right += jbbcsProvidingInput;
left -= jbbcsProvidingInput;
leftSubset = left.jbbcsToJBBSubset();
}
}
}
/*
if(jbbcsProvidingInput.entries() == 1)
{
CANodeId nodeProvidingInput = jbbcsProvidingInput.getFirst();
JBBC * jbbcProvidingInput =
nodeProvidingInput.getNodeAnalysis()->getJBBC();
CANodeIdSet jbbcsJoinedToInput = jbbcProvidingInput->getJoinedJBBCs();
if(jbbcsJoinedToInput.entries() < 1)
{
CANodeIdSet jbbcsNeededByLeft =
leftSubset->getPredecessorJBBCs();
if(!jbbcsNeededByLeft.contains(nodeProvidingInput))
right += nodeProvidingInput;
left -= nodeProvidingInput;
leftSubset = left.jbbcsToJBBSubset();
}
}
*/
}
JBBSubset * rightSubset = right.jbbcsToJBBSubset();
Lng32 newSubgraphs = leftSubset->numConnectedSubgraphs();
// xxx only counting joinPreds here
// do this only for inner-nonSemi-nonTSJ joins
if(((!rightJBBCParent) ||
rightJBBCParent->isInnerNonSemiNonTSJJoin()) &&
(rightSubset->joinPredsWithOther(*(leftSubset)).entries() == 0))
newSubgraphs++;
if ((newSubgraphs > mySubgraphs) && allowCrossProductControl)
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << " Unnecessary Cross Product when joining right child" << (CollIndex) i << endl;
}
#endif
continue;
}
if (jbbcRight.getNodeAnalysis()->getJBBC()->isFullOuterJoinOrTSJJBBC() &&
!(jbbcRight.getNodeAnalysis()->getJBBC()->getOriginalParentJoin()))
{
continue;
}
Join* substitute = mjoin->splitSubset(*(leftSubset),
*(rightSubset));
// this should not happen, but just in case
if (!substitute)
continue;
potentialSubstitutes[numSubstitutes] = substitute;
substitute->child(0)->synthLogProp();
// synthesize logical properties if right child is a multi join
if(rightSubset->getJBBCs().entries() > 1)
substitute->child(1)->synthLogProp();
// Schedule the enumeration rule on the right child if applicable
if (rightSubset->getJBBCs().entries() > 1)
((MultiJoin*)(substitute->getChild(1)))->scheduledLSRs() += MJEnumRuleNumber;
// Schedule the enumeration rule on the right child if applicable
if (leftSubset->getJBBCs().entries() > 1)
((MultiJoin*)(substitute->getChild(0)))->scheduledLSRs() += MJEnumRuleNumber;
UInt32 minRecordLength = (ActiveSchemaDB()->getDefaults())\
.getAsLong(COMP_INT_50);
CostScalar leftChildFlow = // Flow from left MultiJoin
substitute->child(0)->getGroupAttr()->getResultCardinalityForEmptyInput() *
MAXOF(substitute->child(0)->getGroupAttr()->getRecordLength(), minRecordLength);
substituteMetric[numSubstitutes] = childrenFlow + leftChildFlow;
substituteMetric2[numSubstitutes] = leftChildFlow;
substituteRightChild[numSubstitutes] = i;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Metric1 for Right Child " << (CollIndex) i <<
" is "<< substituteMetric[numSubstitutes].value() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Metric2 for Right Child " << (CollIndex) i <<
" is "<< substituteMetric2[numSubstitutes].value() << endl;
}
#endif
// Remembering the substitutes with min metric
if ((substituteMetric[numSubstitutes] < minSubstituteMetric) ||
(minSubstituteMetric == -1))
{
minSubstituteMetric = substituteMetric[numSubstitutes];
}
// inspectors ignore this part
// Remembering the substitutes with lowest two metrics
if ((substituteMetric2[numSubstitutes] < minSubstituteMetric2) ||
(minSubstituteMetric2 == -1))
{
nextMinSubstituteMetric2 = minSubstituteMetric2;
minSubstituteMetric2 = substituteMetric2[numSubstitutes];
}
else if ((substituteMetric2[numSubstitutes] < nextMinSubstituteMetric2) ||
(nextMinSubstituteMetric2 == -1))
{
nextMinSubstituteMetric2 = substituteMetric2[numSubstitutes];
}
// inspectors ignore ends
if(rightJBBC->isGuaranteedEqualizer())
guaranteedEqualizerPresent = TRUE;
// get substitute left child card
CostScalar leftCard =
potentialSubstitutes[numSubstitutes]->
child(0)->getGroupAttr()->
getResultCardinalityForEmptyInput();
if((leftCard < joinCardinality) &&
(rightJBBC->getJoinedJBBCs().entries()==1) &&
(jbbcs.contains(rightJBBC->getJoinedJBBCs())))
multiplierPresent = TRUE;
numSubstitutes++;
}
// enumerate the joins - End
// dataflow based pruning - Begin
// Data Flow Optimization Fudge Factors
CostScalar DATA_FLOW_FACTOR_1
((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_7));
const CostScalar DATA_FLOW_FACTOR_2(1000);
Int32 numPrunedSubstitutes = 0;
// Variables used to enumerate only one substitute for multipliers.
// Only one substitute with an multiplier is enumerated. The substitute
// that produces the lowest data flow is chosen. Currently multipliers
// connected to only a single tables are considered.
// index of the equalizer with the lowest dataflow
CollIndex multiplierWithLowestDFIdx = -1;
// lowest dataflow of the substitute using an equalizer
CostScalar multiplierLowestDF = -1;
// Variables used to enumerate only one substitute for equalizers.
// Only one substitute with an equalizer is enumerated. The substitute
// that produces the lowest data flow is chosen. Currently children
// connected via a left join on a unique column are considered equalizers,
// since they will neither increase nor decrease the # or rows coming in
// from the left side of the join.
// index of the equalizer with the lowest dataflow
CollIndex equalizerWithLowestDFIdx = -1;
// lowest dataflow of the substitute using an equalizer
CostScalar equalizerLowestDF = -1;
for (childIter = 0;
childIter < numSubstitutes;
childIter++)
{
RelExpr* substitute = potentialSubstitutes[childIter];
if (!substitute)
{
continue;
}
// get the right child JBBC
CANodeId jbbcRight = substituteRightChild[childIter];
JBBC * rightJBBC = jbbcRight.getNodeAnalysis()->getJBBC();
// get left child card
CostScalar lCard =
potentialSubstitutes[childIter]->
child(0)->getGroupAttr()->
getResultCardinalityForEmptyInput();
if((mjoin->getArity() > 2) &&
multiplierPresent &&
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_79) > 1))
{
if((lCard < joinCardinality)&&
(rightJBBC->getJoinedJBBCs().entries()==1) &&
(jbbcs.contains(rightJBBC->getJoinedJBBCs())))
{
CollIndex multiplierIdx = childIter;
CostScalar multiplierDF = substituteMetric[childIter];
// first equalizer, don't prune
if(multiplierWithLowestDFIdx == -1)
{
multiplierWithLowestDFIdx = multiplierIdx;
multiplierLowestDF = multiplierDF;
}
else{
// prune the equalizer with a worse dataflow
if(multiplierDF < multiplierLowestDF)
{
potentialSubstitutes[multiplierWithLowestDFIdx] = NULL;
multiplierWithLowestDFIdx = multiplierIdx;
multiplierLowestDF = multiplierDF;
}
else{
potentialSubstitutes[multiplierIdx] = NULL;
}
numPrunedSubstitutes++;
}
}
else{
potentialSubstitutes[childIter] = NULL;
numPrunedSubstitutes++;
}
continue;
}
if((mjoin->getArity() > 2) &&
guaranteedEqualizerPresent &&
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_79) > 1))
{
if(rightJBBC->isGuaranteedEqualizer())
{
CollIndex equalizerIdx = childIter;
CostScalar equalizerDF = substituteMetric[childIter];
// first equalizer, don't prune
if(equalizerWithLowestDFIdx == -1)
{
equalizerWithLowestDFIdx = equalizerIdx;
equalizerLowestDF = equalizerDF;
}
else{
// prune the equalizer with a worse dataflow
if(equalizerDF < equalizerLowestDF)
{
potentialSubstitutes[equalizerWithLowestDFIdx] = NULL;
equalizerWithLowestDFIdx = equalizerIdx;
equalizerLowestDF = equalizerDF;
}
else{
potentialSubstitutes[equalizerIdx] = NULL;
}
numPrunedSubstitutes++;
}
}
else{
potentialSubstitutes[childIter] = NULL;
numPrunedSubstitutes++;
}
continue;
}
if ((mjoin->getArity() > 2) &&
(CURRSTMT_OPTDEFAULTS->optimizerHeuristic5()) && // Data Flow Optimization
(substituteMetric[childIter] > DATA_FLOW_FACTOR_1 * minSubstituteMetric + DATA_FLOW_FACTOR_2))
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "For right child "
<< (CollIndex) substituteRightChild[childIter]
<< " " << substituteMetric[childIter].value()
<< " >> "<< minSubstituteMetric.value() << endl;
}
#endif
potentialSubstitutes[childIter] = NULL;
numPrunedSubstitutes++;
continue;
}
// The if condition below is unneccesary
// Aggressive Data Flow Heuristic.
if ((((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_51) >= 2) &&
(substituteMetric2[childIter] > nextMinSubstituteMetric2)) ||
(((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_51) == 1) &&
(substituteMetric2[childIter] > minSubstituteMetric2)))
{
if (mjoin->getArity() > 2)
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "For right child "
<< (CollIndex) substituteRightChild[childIter]
<< " " << substituteMetric2[childIter].value()
<< " ---- "<< nextMinSubstituteMetric2.value()
<< " ---- "<< minSubstituteMetric2.value() << endl;
}
#endif
potentialSubstitutes[childIter] = NULL;
numPrunedSubstitutes++;
continue;
}
}
// prune substitutes that involve an equalizer
if ((mjoin->getArity() > 2) &&
rightJBBC->isGuaranteedEqualizer() &&
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_79) > 0))
{
CollIndex equalizerIdx = childIter;
CostScalar equalizerDF = substituteMetric[childIter];
// first equalizer, don't prune
if(equalizerWithLowestDFIdx == -1)
{
equalizerWithLowestDFIdx = equalizerIdx;
equalizerLowestDF = equalizerDF;
}
else{
// prune the equalizer with a worse dataflow
if(equalizerDF < equalizerLowestDF)
{
potentialSubstitutes[equalizerWithLowestDFIdx] = NULL;
equalizerWithLowestDFIdx = equalizerIdx;
equalizerLowestDF = equalizerDF;
}
else{
potentialSubstitutes[equalizerIdx] = NULL;
}
numPrunedSubstitutes++;
continue;
}
}
}
// dataflow based pruning - End
// Sort based on dataflow - Begin
// The pruning above could have left 'holes' in the potentialSubstitutes
// array, the sorting below will 'compact' the array.
// following four arrays are used to sort the substitutes by
// substituteMetric
RelExpr** sortedPotentialSubstitutes =
new (CmpCommon::statementHeap()) RelExpr*[numSubstitutes];
CostScalar* sortedSubstituteMetric =
new (CmpCommon::statementHeap()) CostScalar[numSubstitutes];
CostScalar* sortedSubstituteMetric2 =
new (CmpCommon::statementHeap()) CostScalar[numSubstitutes];
CANodeId* sortedSubstituteRightChild =
new (CmpCommon::statementHeap()) CANodeId[numSubstitutes];
for (childIter = 0; childIter < numSubstitutes; childIter++)
{
RelExpr * largestChild = NULL;
Int32 largestChildLoc = -1;
CostScalar largestChildMetric = -1;
CANodeId largestChildId = NULL_CA_ID;
for (Int32 childIter2 = 0; childIter2 < numSubstitutes; childIter2++)
{
RelExpr* substitute = potentialSubstitutes[childIter2];
CANodeId childId = substituteRightChild[childIter2];
if (!substitute)
{
continue;
}
CostScalar currentChildMetric = substituteMetric[childIter2];
if (currentChildMetric > largestChildMetric)
{
largestChild = substitute;
largestChildLoc = childIter2;
largestChildMetric = substituteMetric[childIter2];
largestChildId = childId;
}
}
if(largestChild)
{
potentialSubstitutes[largestChildLoc]=NULL;
sortedSubstituteMetric[childIter]= substituteMetric[largestChildLoc];
sortedSubstituteMetric2[childIter]= substituteMetric2[largestChildLoc];
substituteRightChild[childIter]=NULL_CA_ID;
}
sortedPotentialSubstitutes[childIter]=largestChild;
sortedSubstituteRightChild[childIter]=largestChildId;
}
// following four arrays are used during insertion into memo
RelExpr** resultPotentialSubstitutes = sortedPotentialSubstitutes;
CostScalar* resultSubstituteMetric = sortedSubstituteMetric;
CostScalar* resultSubstituteMetric2 = sortedSubstituteMetric2;
CANodeId* resultSubstituteRightChild = sortedSubstituteRightChild;
// Sort based on dataflow - End
// Insert into substitute memory - Begin
// number of substitute remaining after dataflow based pruning
numSubstitutes-=numPrunedSubstitutes;
// Now we insert the winning potential Substitutes
Int32 numGeneratedSubstitutes = 0;
Int32 maxSubstitutes = numSubstitutes;
maxSubstitutes = (ActiveSchemaDB()->getDefaults()).\
getAsLong(COMP_INT_51);
if ((maxSubstitutes <= 0) ||
(maxSubstitutes > numSubstitutes) ||
(numChildren == 2))
maxSubstitutes = numSubstitutes;
Int32 potential = maxSubstitutes - 1;
// If this is the top multijoin
if (before->getGroupAttr()->getPotential() < 0)
{
before->getGroupAttr()->updatePotential(0);
}
Int32 groupPotential = before->getGroupAttr()->getPotential();
Int32 combinedPotentialThreshold =
CURRSTMT_OPTDEFAULTS->getEnumPotentialThreshold();
for (childIter = (numSubstitutes - maxSubstitutes);
childIter < numSubstitutes;
childIter++)
{
RelExpr* substitute = resultPotentialSubstitutes[childIter];
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "For right child "
<< (CollIndex) resultSubstituteRightChild[childIter]
<< " " << resultSubstituteMetric[childIter].value()
<< " ~~ "<< minSubstituteMetric.value() << endl;
}
#endif
Int32 combinedPotential = groupPotential + potential;
substitute->updatePotential(potential);
if(substitute->child(0)->getOperatorType()==REL_MULTI_JOIN)
{
substitute->child(0)->getGroupAttr()
->updatePotential(combinedPotential);
}
else
{
// consider every alternative plan for the left most join of a JBB.
combinedPotentialThreshold =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_24);
}
if(combinedPotential <= combinedPotentialThreshold)
{
memory->insert(substitute);
numGeneratedSubstitutes++;
}
potential--;
}
//"potentialSubstitutes" will be freed by statementHeap
//"<temporary>" will be freed by statementHeap
//"substituteMetric" will be freed by statementHeap
//"substituteMetric2" will be freed by statementHeap
//coverity[leaked_storage]
// Insert into substitute memory - Begin
}
if (memory != NULL)
{
result = memory->getNextSubstitute();
if (result == NULL)
{
// returned all the substitutes
// now delete the substitute memory, so we won't be called again
delete memory;
memory = NULL;
return result;
}
result->synthLogProp();
//result->child(0)->synthLogProp(); already called above while preparing
// substitutes
result->contextInsensRules() += GlobalRuleSet->transformationRules();
result->contextInsensRules() -= JoinToTSJRuleNumber;
result->contextInsensRules() -= RoutineJoinToTSJRuleNumber;
// Also allow join commutativity if arity is > 2
if (mjoin->getArity() > 2)
result->contextInsensRules() -= JoinCommutativityRuleNumber;
// return the next retrieved substitute
return result;
}
return result;
}
// -----------------------------------------------------------------------
// methods for MJStarJoinIRule
// -----------------------------------------------------------------------
NABoolean MJStarJoinIRule::topMatch (RelExpr * expr,
Context * context)
{
if (CmpCommon::getDefault(COMP_BOOL_115) == DF_ON)
return topMatch_Old(expr, context);
// COMP_BOOL_2 determines if LargeScope rules are tried
if (CmpCommon::getDefault(COMP_BOOL_2) == DF_ON)
return FALSE;
// MJ rules should only fire on a MultiJoin
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// Get a handle to the MultiJoin on which we are performing the topMatch()
MultiJoin* mjoin = (MultiJoin*) expr;
// The StarJoin Type-I rule fires only on the top MultiJoin in a JBB
// and following recursive calls which are marked by the scheduledLSRs set.
// This scheduledLSRs is set by the StarJoin type-I Rule itself when it produces
// a substitute. If the substitute i.e. the plan resulting from the application
// of the StarJoin Type-I rule has children that are MultiJoins, the rule schedules
// the application of the StarJoin Type-I rule on those children via the
// scheduledLSRs set.
if((mjoin->getJBBSubset().getJBBCs() != mjoin->getJBBSubset().getJBB()->getJBBCs()) &&
(!mjoin->scheduledLSRs().contains(MJStarJoinIRuleNumber)))
return FALSE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_Begin" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinI Rule TopMatch on " << mjoin->getJBBSubset().getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Superset is : " << mjoin->getJBBSubset().getJBB()->getMainJBBSubset().getText() << endl;
}
#endif //_DEBUG
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
mjoinAnalysis->analyzeInitialPlan();
if (mjoinAnalysis->starJoinTypeIFeasible())
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule is feasible" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
return TRUE;
}
else {
mjoin->scheduledLSRs() += MJStarJoinIIRuleNumber;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule is not feasible" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Scheduled MJStarJoinIIRule" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
}
return FALSE;
}
// the old topMatch method of MJStarJoinIRule,
// not used anymore this code is OFF by default
NABoolean MJStarJoinIRule::topMatch_Old (RelExpr * expr,
Context * context)
{
// COMP_BOOL_2 determines if LargeScope rules are tried
if (CmpCommon::getDefault(COMP_BOOL_2) == DF_ON)
return FALSE;
// MJ rules should only fire on a MultiJoin
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// Get a handle to the MultiJoin on which we are performing the topMatch()
MultiJoin* mjoin = (MultiJoin*) expr;
// The StarJoin Type-I rule fires only on the top MultiJoin in a JBB
// and following recursive calls which are marked by the scheduledLSRs set.
// This scheduledLSRs is set by the StarJoin type-I Rule itself when it produces
// a substitute. If the substitute i.e. the plan resulting from the application
// of the StarJoin Type-I rule has children that are MultiJoins, the rule schedules
// the application of the StarJoin Type-I rule on those children via the
// scheduledLSRs set.
if((mjoin->getJBBSubset().getJBBCs() != mjoin->getJBBSubset().getJBB()->getJBBCs()) &&
(!mjoin->scheduledLSRs().contains(MJStarJoinIRuleNumber)))
return FALSE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_Begin" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinI Rule TopMatch on " << mjoin->getJBBSubset().getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Superset is : " << mjoin->getJBBSubset().getJBB()->getMainJBBSubset().getText() << endl;
}
#endif //_DEBUG
// create a new work area
// we create a work area to save work done during the topMatch(),
// so that the results can be re-used while producing the nextSubstitute()
MJStarJoinIRuleWA * mjStarJoinIRuleWA = new MJStarJoinIRuleWA();
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
// assert if there is already a MJStarJoinIRuleWA associated with this mjoin
CMPASSERT(!mjoinAnalysis->getMJStarJoinIRuleWA());
// get a handle to the MJRulesWA
// This work area is used to store information that we compute
// so that other interest MJRules can use it.
MJRulesWA * mjRulesWA = mjoinAnalysis->getMJRulesWA();
// set the StarJoin rule work area for this MultiJoin
mjoinAnalysis->setMJStarJoinIRuleWA(mjStarJoinIRuleWA);
// Cardinality of fact table after application of local predicates on prefix
// of clustering key
CostScalar factTableCKPrefixCardinality;
CANodeId largestTable = NULL_CA_ID;
// determine if there is a clear fact table
CANodeId factTable = findFactTable(mjoin,
factTableCKPrefixCardinality,
largestTable);
mjRulesWA->factTable_ = factTable;
mjRulesWA->largestTable_ = largestTable;
// if we did not find a fact table then look for the center table
if (factTable == NULL_CA_ID)
{
CANodeId centerTable = mjRulesWA->computeCenterTable();
CostScalar centerTableSize = mjRulesWA->centerTableDataScanned_;
CostScalar centerTableSizePerPartition =
mjRulesWA->centerTableDataPerPartition_;
CostScalar centerTablePartitions = mjRulesWA->centerTablePartitions_;
if(centerTable != NULL_CA_ID)
{
JBBC * centerTableJBBC = centerTable.getNodeAnalysis()->getJBBC();
if(!(centerTableJBBC->getPredecessorJBBCs().entries()))
{
if(centerTable == largestTable)
factTable = centerTable;
else
{
if((centerTableSizePerPartition >
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_16)*1024))||
(centerTablePartitions >
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_17)))
{
factTable = centerTable;
}
}
}
}
// if we did not find a center table
if (factTable == NULL_CA_ID)
{
mjoin->scheduledLSRs() += MJStarJoinIIRuleNumber;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule is not feasible" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
}
// set the factTable in the work area
mjStarJoinIRuleWA->factTable_ = factTable;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "FactTable: " << factTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << endl;
}
#endif
// check to see if the MultiJoin matches a star pattern
if (!isAStarPattern(mjoin, factTable, factTableCKPrefixCardinality))
{
// if not, then schedule other LSRs
mjoin->scheduledLSRs() += MJStarJoinIIRuleNumber;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule is not feasible" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
// matched the Star Schema Pattern, set flag to indicate that in the work area
mjStarJoinIRuleWA->matchedStarSchema_ = TRUE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule is feasible" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
return TRUE;
}
RelExpr * MJStarJoinIRule::nextSubstitute(RelExpr * before,
Context * context,
RuleSubstituteMemory * &memory)
{
if (CmpCommon::getDefault(COMP_BOOL_115) == DF_ON)
return nextSubstitute_Old(before, context, memory);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_nextSubstitute_Begin" <<endl;
}
#endif //_DEBUG
MultiJoin * mjoin = (MultiJoin *) before;
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
const NAList<CANodeIdSet> * const leftDeepJoinSequence =
mjoinAnalysis->getInitialPlanLeftDeepJoinSequence();
UInt32 numEntriesInLeftDeepJoinSequence =
(*leftDeepJoinSequence).entries();
// setup the join directives for the left deep join sequence
NAList<MJJoinDirective *> joinDirectives(CmpCommon::statementHeap(),
numEntriesInLeftDeepJoinSequence-1);
NABoolean encounteredFactTable = FALSE;
UInt32 i = 0;
CANodeId factTable = mjoinAnalysis->getFactTable();
for(i=0;
i < (numEntriesInLeftDeepJoinSequence-1);
i++)
{
MJJoinDirective * joinDirective = new MJJoinDirective(CmpCommon::statementHeap());
CANodeIdSet joinRightChild = (*leftDeepJoinSequence)[i];
JBBC * joinRightChildJBBC = NULL;
if(joinRightChild.entries() == 1)
joinRightChildJBBC = joinRightChild.getFirst().getNodeAnalysis()->getJBBC();
// setup unconditional join directives
joinDirective->setSkipJoinLeftShift();
// COMP_BOOL_7 is OFF my default therefore
// this is essentially unconditional
if (CmpCommon::getDefault(COMP_BOOL_7) == DF_OFF)
joinDirective->setJoinFromPTRule();
if(joinRightChild == factTable)
{
encounteredFactTable = TRUE;
if (CmpCommon::getDefault(COMP_BOOL_71) == DF_OFF)
joinDirective->setSkipHashJoin();
joinDirective->setSkipMergeJoin();
joinDirective->setJoinSource(Join::STAR_FACT);
}
else if (encounteredFactTable)
{
if(CmpCommon::getDefault(COMP_BOOL_100) == DF_OFF)
joinDirective->setSkipMergeJoin();
joinDirective->setJoinSource(Join::STAR_KEY_JOIN);
}
else {
joinDirective->setJoinSource(Join::STAR_FILTER_JOIN);
if ((CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF) &&
joinRightChildJBBC &&
!(joinRightChildJBBC->getOriginalParentJoin() &&
joinRightChildJBBC->getOriginalParentJoin()->isRoutineJoin()))
joinDirective->setSkipNestedJoin();
}
// Turn OFF join commutativity if either is TRUE:
// 1. If this is the NJ into the fact table
// 2. If the join is before the NJ into the fact table and COMP_BOOL_5 = OFF
// 3. If COMP_BOOL_3 = ON
if (((joinRightChild == factTable) && encounteredFactTable) ||
(encounteredFactTable && (CmpCommon::getDefault(COMP_BOOL_5) == DF_OFF)) ||
(CmpCommon::getDefault(COMP_BOOL_3) == DF_ON))
{
joinDirective->setSkipJoinCommutativity();
}
if(joinRightChild.entries() > 1)
{
joinDirective->scheduleLSROnRightChild(MJStarJoinIRuleNumber);
}
// any special handling of the last join (i.e. last from the top)
if(i == numEntriesInLeftDeepJoinSequence - 2)
{
CANodeIdSet joinLeftChild = (*leftDeepJoinSequence)[i+1];
if(joinLeftChild.entries() > 1)
{
joinDirective->scheduleLSROnLeftChild(MJStarJoinIRuleNumber);
}
}
joinDirectives.insert(joinDirective);
}
RelExpr * result = mjoin->createLeftLinearJoinTree(leftDeepJoinSequence,
&joinDirectives);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_nextSubstitute_End" <<endl;
}
#endif //_DEBUG
return result;
}
// This is old code, most of this work in now done during
// analysis. This functionality was moved to the analysis
// phase as part of Adaptive Segmentation. Since we need
// to get an early heuristic plan to determine a reasonable
// degree of parallelism
RelExpr * MJStarJoinIRule::nextSubstitute_Old(RelExpr * before,
Context * context,
RuleSubstituteMemory * &memory)
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_nextSubstitute_Begin" <<endl;
}
#endif //_DEBUG
MultiJoin * mjoin = (MultiJoin *) before;
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
MJStarJoinIRuleWA * mjStarJoinIRuleWA = mjoin->
getJBBSubset().
getJBBSubsetAnalysis()->
getMJStarJoinIRuleWA();
// create the plan
// By this time the topMatch should have already done the following:
// 1. figured out the fact table - FT
// 2. matched the star schema pattern
// 3. computed the list of edges to be joined before the fact table - BFT
// 4. the list of tables that should be join after the fact table - AFT
//
// The BFT will be joined in the order the edges are found in
// mjStarJoinIRuleWA->listOfEdges
//
// Following is how the join tree should look like
//
// Join
// / \
// / AFT
// /
// Nested_Join
// / \
// BFT FT
//
// In this method we need to figure out the ordering of the tables in the AFT
// and then produce a plan
CANodeId factTable = mjStarJoinIRuleWA->factTable_;
// begin - try to figure out the ordering of the tables in AFT
// get the set of tables (BFT + FT).
// we already know the join ordering for this set
CANodeIdSet factTableSet = mjStarJoinIRuleWA->nodesJoinedBeforeFactTable_;
factTableSet.insert(factTable);
// get the set of table (AFT).
// we don't know the join ordering of these tables except the fact that
// they should be join after the fact table
CANodeIdSet availableNodes = mjStarJoinIRuleWA->availableNodes_;
#ifdef _DEBUG
//print the right child of the current join
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Table to be joined after fact table "\
<< availableNodes.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Fact table "\
<< factTable.getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Table to be joined before fact table "\
<< mjStarJoinIRuleWA->nodesJoinedBeforeFactTable_.getText() << endl;
}
#endif
// this list should contain the available nodes in the order
// they should be joined
NAList<CANodeIdSet> availableNodesOrdered(CmpCommon::statementHeap(),
availableNodes.entries());
while (availableNodes.entries())
{
// Node that produces the fewest rows when joined to the factTableSet
CANodeId smallestNode = NULL_CA_ID;
CostScalar lowestCardinality = 0;
for( CANodeId availableNode = availableNodes.init();
availableNodes.next(availableNode);
availableNodes.advance(availableNode))
{
JBBC * availableNodeJBBC =
availableNode.getNodeAnalysis()->getJBBC();
CANodeIdSet jBBCsAvailableNodeDependsOn =
availableNodeJBBC->getPredecessorJBBCs();
// make sure all JBBCs availableNode depends
// on come before the available node
if(!factTableSet.contains(jBBCsAvailableNodeDependsOn))
continue;
//get cardinality of joining factTableset to availableNode
//to nonAvailableNodes
CostScalar joinCardinality =
appStatMan->
joinJBBChildren(factTableSet, availableNode)->
getResultCardinality();
if((smallestNode == NULL_CA_ID) ||
(joinCardinality < lowestCardinality))
{
lowestCardinality = joinCardinality;
smallestNode = availableNode;
}
}
// remove the smallest node from the available nodes
availableNodes.remove(smallestNode);
// add the smallest node to ordered list of available nodes
availableNodesOrdered.insert(smallestNode);
// update the factTableSet for the next iteration
factTableSet.insert(smallestNode);
}
// end - try to figure out the ordering of the tables in AFT
// begin - construct left deep join sequence
// The list CANodeIdSets representing the join
// sequence produced by this rule
// Element 0: is inner most / top most element in left deep join sequence
// Element 1: is left most/outer most element in left deep join sequence
NAList<CANodeIdSet> leftDeepJoinSequence (CmpCommon::statementHeap(),
mjoin->getJBBSubset().getJBBCs().entries());
UInt32 i = 0;
// first insert tables from AFT
for(i = availableNodesOrdered.entries(); i > 0; i--)
{
leftDeepJoinSequence.insert(availableNodesOrdered[(i-1)]);
}
// insert the fact table
leftDeepJoinSequence.insert(factTable);
NAList<CANodeIdSet> * listOfEdges = mjStarJoinIRuleWA->listOfEdges_;
// insert the tables from the BFT
// tables in the BFT are organized as a list of edges
for(i = mjStarJoinIRuleWA->optimalFTLocation_; i > 0; i--)
{
leftDeepJoinSequence.insert((*listOfEdges)[(i-1)]);
}
// Consolidate the fringes
mjoinAnalysis->consolidateFringes(factTable, leftDeepJoinSequence);
// end - construct left deep join sequence
// setup the join directives for the left deep join sequence
NAList<MJJoinDirective *> joinDirectives(CmpCommon::statementHeap(),leftDeepJoinSequence.entries()-1);
UInt32 numEntriesInLeftDeepJoinSequence = leftDeepJoinSequence.entries();
NABoolean encounteredFactTable = FALSE;
for(i=0;
i < (numEntriesInLeftDeepJoinSequence-1);
i++)
{
MJJoinDirective * joinDirective = new MJJoinDirective(CmpCommon::statementHeap());
CANodeIdSet joinRightChild = leftDeepJoinSequence[i];
JBBC * joinRightChildJBBC = NULL;
if(joinRightChild.entries() == 1)
joinRightChildJBBC = joinRightChild.getFirst().getNodeAnalysis()->getJBBC();
// setup unconditional join directives
joinDirective->setSkipJoinLeftShift();
// COMP_BOOL_7 is OFF my default therefore
// this is essentially unconditional
if (CmpCommon::getDefault(COMP_BOOL_7) == DF_OFF)
joinDirective->setJoinFromPTRule();
if(joinRightChild == factTable)
{
encounteredFactTable = TRUE;
if (CmpCommon::getDefault(COMP_BOOL_71) == DF_OFF)
joinDirective->setSkipHashJoin();
joinDirective->setSkipMergeJoin();
joinDirective->setJoinSource(Join::STAR_FACT);
}
else if (encounteredFactTable)
{
if(CmpCommon::getDefault(COMP_BOOL_100) == DF_OFF)
joinDirective->setSkipMergeJoin();
joinDirective->setJoinSource(Join::STAR_KEY_JOIN);
}
else {
joinDirective->setJoinSource(Join::STAR_FILTER_JOIN);
if ((CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF) &&
joinRightChildJBBC &&
!(joinRightChildJBBC->getOriginalParentJoin() &&
joinRightChildJBBC->getOriginalParentJoin()->isRoutineJoin()))
joinDirective->setSkipNestedJoin();
}
if ((encounteredFactTable)||
(CmpCommon::getDefault(COMP_BOOL_3) == DF_ON))
{
joinDirective->setSkipJoinCommutativity();
}
if(joinRightChild.entries() > 1)
{
joinDirective->scheduleLSROnRightChild(MJStarJoinIRuleNumber);
}
// any special handling of the last join (i.e. last from the top)
if(i == numEntriesInLeftDeepJoinSequence - 2)
{
CANodeIdSet joinLeftChild = leftDeepJoinSequence[i+1];
if(joinLeftChild.entries() > 1)
{
joinDirective->scheduleLSROnLeftChild(MJStarJoinIRuleNumber);
}
}
joinDirectives.insert(joinDirective);
}
RelExpr * result = mjoin->createLeftLinearJoinTree(&leftDeepJoinSequence,
&joinDirectives);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_nextSubstitute_End" <<endl;
}
#endif //_DEBUG
return result;
}
CANodeId MJStarJoinIRule::isAStarShape(MultiJoin * mjoin)
{
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = mjoin->getJBBSubset().getJBBCs();
UInt32 n = childSet.entries();
//For a multijoin to be a star shape given n children
//(n-1) children should be connect to one other child
//where as 1 child will be connected to n-1 children
CANodeId center = NULL_CA_ID;
// require atleast 3 tables
if (n < 3)
return NULL_CA_ID;
for(CANodeId currentTable = childSet.init();
childSet.next(currentTable);
childSet.advance(currentTable))
{
UInt32 numConnectedChildren =
currentTable.getNodeAnalysis()->getJBBC()->getJoinedJBBCs().entries();
if ((center == NULL_CA_ID) && (numConnectedChildren == (n-1)))
center = currentTable;
else{
if ( numConnectedChildren != 1)
{
return NULL_CA_ID;
}
}
}
return center;
}
// This method, given a fact table, matches a multi-join to a star pattern
// This method is called by the MJStarJoinIRule::topMatch()
NABoolean MJStarJoinIRule::isAStarPattern(MultiJoin * mjoin, CANodeId factTable, CostScalar factTableCKPrefixCardinality)
{
if (CmpCommon::getDefault(COMP_BOOL_88) == DF_ON)
return FALSE;
if (NOT CURRSTMT_OPTDEFAULTS->isNestedJoinConsidered())
return FALSE;
// This method is called while doing the topMatch on a multi-join to
// determine if it matches a star schema pattern.11
// Get a handle to the work area
// We need a work area to save work done while doing the
// topMatch, since this work is re-used in the nextSubstitute.
// This saves us from doing this heavy work again.
MJStarJoinIRuleWA * mjStarJoinIRuleWA = mjoin->
getJBBSubset().
getJBBSubsetAnalysis()->
getMJStarJoinIRuleWA();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_isAStarPattern_Begin" <<endl;
}
#endif //_DEBUG
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = mjoin->getJBBSubset().getJBBCs();
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//all nodes in this multi-join except the fact table
CANodeIdSet nodesExcludingFactTable = childSet;
nodesExcludingFactTable.remove(factTable);
//temp vars used for iteration
ULng32 i=0;
CANodeId currentNode;
CANodeIdSet currentNodeSet;
CostScalar currentCardinality;
// get the index_desc for the facttable primary index
const IndexDesc * factTableIndexDesc =
factTable.getNodeAnalysis()
->getTableAnalysis()
->getTableDesc()
->getClusteringIndex();
const PartitioningFunction * factTablePartFunc =
factTableIndexDesc
->getPartitioningFunction();
ValueIdSet factTablePartKey;
Int32 factTableNumPartitions = 1;
NABoolean factIsHashPartitioned = FALSE;
if (factTablePartFunc)
{
factTablePartKey =
factTablePartFunc->getPartitioningKey();
factTableNumPartitions =
factTablePartFunc->getCountOfPartitions();
factIsHashPartitioned =
factTablePartFunc->isATableHashPartitioningFunction();
}
//for further processing we'll need the clustering key of the fact table
ValueIdList factTableCK =
factTableIndexDesc->getIndexKey();
// Variables used during iteration
ValueId keyColumn;
CANodeIdSet connectedTables;
CANodeIdSet tablesConnectedToColumn;
CANodeIdSet tablesConnectedViaThisColumn;
Lng32 numPrefixColsCoveredFromFactTableCK = 0;
// this variable is used to get the list of
// fact table clustering key prefix columns
// that are covered by join or local preds
ValueIdList coveredFactTableCKPrefix;
// temp vars to pass into getJBBCsConnectedToPrefixOfList()
ValueIdSet dummyForJoinPreds;
ValueIdSet dummyForLocalPreds;
// get:
// 1. Tables connected to Fact Table via join predicates on clustering key prefix
// 2. Fact Table clustering key prefix columns that have a predicate on them
// 3. Number of fact table clustering key prefix columns covered
connectedTables = factTable.getNodeAnalysis()->
getTableAnalysis()->
getJBBCsConnectedToPrefixOfList(
nodesExcludingFactTable,
factTableCK,
numPrefixColsCoveredFromFactTableCK,
dummyForJoinPreds,
dummyForLocalPreds);
// if a prefix of the fact table clustering key is not covered
// by a join predicate, then return FALSE
if((!connectedTables.entries()))
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Not Matched, there is no join predicate on prefix of clustering key" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
// this set will be used to keep track of tables
// that are not part of the edges of the star
// schema
CANodeIdSet availableNodes(childSet);
// remove fact table from available nodes
availableNodes-=factTable;
// remove tables directly connected to the fact table
// via join predicates on fact table clustering key
// prefix
availableNodes-=connectedTables;
// set of tables representing the edges of the star from tables connected to the
// prefix of the fact table clustering key
// start by building an edge from the tables connected to the first prefix column
// then add to that the edge from the tables connected to the next prefix column
// and so on....
// once an edge is built it will be inserted in a list of edges (variable
// listOfEdges) The list as evident from the comments above will be ordered by
// the position of the fact table clustering key column through which the edge
// is connected.
CANodeIdSet edge;
// variable used while iterating in the loops below
CANodeIdSet currentEdge;
// Set of all tables in edges that should be joined before fact table
// i.e. rows coming out of this set of tables will be nested joined
// into fact table
// If this set is empty then the star pattern was not matched
CANodeIdSet optimalEdgeSet;
// Cardinality of the set of tables in an edge
CostScalar dataFlowFromEdge;
// Cardinality of the fact table after application of clustering key
// prefix columns after joining with the tables in the set 'edge'.
CostScalar factTableRowsToScan;
// Rows returned from the factTable
CostScalar dataFlowFromFactTable;
// cost of accessing the fact Table as outer most table
CostScalar factTableHashJoinCost;
// compute the cost of accessing the fact Table as the outer most table
factTableHashJoinCost = computeCostForFactTable(csZero,
factTableCKPrefixCardinality,
factTable,
mjoin);
// variable will be used while iterating through the loops below
// cost of nested join of fact table with tables from edge
CostScalar factTableCost;
// variable use to keep track of the lowest cost of a nested join
// on the fact table
//CostScalar lowestFactTableCost(csZero);
CostScalar lowestCombinedCost(csZero);
// This determines how much cheaper a fact table nested should be
// vs hash join / full scan of the fact table, for a plan with a
// nested join on the fact table to be considered feasible.
// Currently the default value is 10, this can be adjusted after
// further testing.
// Remember we are trying to find if a type-I plan is feasible
// If not we'll try a type-II plan (i.e. a plan with a hash
// join on the fact table)
UInt32 factTableCostFactor =
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_3);
// list of edges, where each edge starts from a column connected
// to a key prefix of the fact table. The list is in the order
// of the columns in the key of the fact table
NAList<CANodeIdSet> * listOfEdges =
new (CmpCommon::statementHeap()) NAList<CANodeIdSet> (CmpCommon::statementHeap(),
connectedTables.entries());
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Cost estimate of fact table access as outer most table: "\
<< istring(Lng32(factTableHashJoinCost.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Star Join will make sense if fact table nested join access is "\
<< istring(Lng32(factTableCostFactor))\
<< " times cheaper" << endl;
}
#endif //_DEBUG
//Doing a nested join on the fact table will only make sense if accessing
//fact table as outer most table is factTableCostFactor times as expensive
//as doing a nested join
factTableHashJoinCost = factTableHashJoinCost / factTableCostFactor;
//set of connected tables that have already been used to build edges
CANodeIdSet usedConnectedTables;
UInt32 lookAhead = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_18);
// in the loop below we iterate over the columns of the fact table clustering
// key prefix that are covered by either local or join predicates.
// We iterate in order of the column's position in the clustering key.
// In each iteration we try to see if the column has a join pred then
// start an edge from the table connected via the column.
// This is done so that we get a list of edges in an order which would
// allow for ordered probes into the nested join on the fact table
// e.g. consider the following connectivity graph
// fact table cluster key is : a,b,c in that order
//
//
// DIM2
// |
// b
// |
// DIM1-a-FACT-c-DIM3
//
//
// Then we want to get the edges in order starting from the outer most
// i.e. DIM1, DIM2, DIM3
//
// Since we are attempting to get a plan like
//
// NJ
// / \
// / \
// NJ FACT
// / \
// / \
// NJ DIM3
// / \
// / \
// DIM1 DIM2
//iterate over the covered prefix of the clustering key
for (i = 0 ; i < (ULng32) numPrefixColsCoveredFromFactTableCK; i++)
{
// Get the covered prefix on the fact table clustering key
// covered prefix is constituted by columns that have either
// a local or a join predicate on them.
// get a handle to the key column
keyColumn = factTableCK[i];
//get a handle to the column analysis for this column
ColAnalysis * keyColAnalysis = keyColumn.colAnalysis();
// check if this column has join predicates on it
if(!keyColAnalysis->getConnectedJBBCs().entries())
{
// if this does not have a join predicate on it
// then it's covered by a local pred. Add it to
// the list of key prefix columns covered.
// It could also be because of a veg this column
// does not have a join rather a local pred e.g.
//
// select count(*)
// from
// fact, dim
// where
// fact.a = dim.a and
// dim.a = 10;
//
// In this case 'fact.a = 10' is implied via a veg
// and the join between fact and dim is essentially
// a cross product after application of local preds
coveredFactTableCKPrefix.insert(keyColumn);
continue;
}
// Get tables from this multi-join (i.e. the multi-join on which we are
// doing the top match) that are connected to the fact table via this
// column
// This will get all tables connected via this column
tablesConnectedViaThisColumn = keyColAnalysis->getConnectedJBBCs();
// This will narrow down the set of tables from all connected
// tables to connected tables in this multi-join
tablesConnectedViaThisColumn.intersectSet(connectedTables);
//tablesConnectedViaThisColumn should have atleast one entry
CCMPASSERT(tablesConnectedViaThisColumn.entries());
if(!tablesConnectedViaThisColumn.entries())
{
return FALSE;
}
// insert this column into list of key prefix columns covered
coveredFactTableCKPrefix.insert(keyColumn);
//get the number of tables connected via this columns
UInt32 numTablesConnectedViaThisColumn =
tablesConnectedViaThisColumn.entries();
// Remove tables that have already been considered in previous iterations
tablesConnectedViaThisColumn.subtractSet(usedConnectedTables);
// get connected tables that have not been considered
// i.e. we have not built an edge starting from them.
UInt32 connectedTablesNotConsidered =
tablesConnectedViaThisColumn.entries();
// there are no connected tables to consider
if(!connectedTablesNotConsidered)
continue;
// if tables connected via this column is less than the number of
// connected tables not considered
if(numTablesConnectedViaThisColumn > connectedTablesNotConsidered)
{
// This would happen in the following scenario
// consider fact table key is a, b, c
// consider column connectivity as below
//
// a b c
// | | |
// DIM1 DIM1 |
// | |
// DIM2 DIM2
//
// Notice here when we come to b, we would have already covered b if
// we started an edge from DIM1 while iterating for column 'a'. Therefore
// b is already covered and we can skip over to column c.
//
// This is a special case but I was worried about the case where all dimension
// table might joined to the fact table via a partitioning key column in addition
// to what ever other part of the fact table key they join to.
// This was apparently an MP practice to try to colocate joins in the same partitions
//
// In such a case it would be preferrable to get an ordering of DIM1,DIM2 rather
// than DIM2, DIM1 for the probes going into the fact table nested join
// This column is already covered, move to the next one
continue;
}
// the next table to consider for starting an edge
CANodeId tableToConsider = NULL_CA_ID;
if (tablesConnectedViaThisColumn.entries()==1)
{
// get the jbbc in this set
tableToConsider = tablesConnectedViaThisColumn.getFirst();
}
else{
Lng32 maxPrefix = 0;
CANodeIdSet coveredTables;
// Find the table that gives the max key coverage. Consider the example
// below:
// consider fact table key is a, b, c
// consider column connectivity as below
//
// a b c
// | | |
// DIM1 DIM1 |
// | |
// DIM2 DIM2
//
// When iterating for column 'a' we would like to start an edge
// from table DIM1 rather than DIM2, since 'a' will cover a greater
// key prefix.
for(CANodeId currentTable = tablesConnectedViaThisColumn.init();
tablesConnectedViaThisColumn.next(currentTable);
tablesConnectedViaThisColumn.advance(currentTable))
{
Lng32 coveredPrefix = 0;
// add currentTable to list of tables that we
// have already determined cover key prefix
// In each iteration:
// 1. Add the current table to this list
// 2. determine the key prefix covered
// 3. Remove current table from this list
// Eventually the table that provides the
// greatest key prefix will be considered
usedConnectedTables.insert(currentTable);
// find the key prefix covered
coveredTables = factTable.getNodeAnalysis()->
getTableAnalysis()->
getJBBCsConnectedToPrefixOfList(
usedConnectedTables,
factTableCK,
coveredPrefix,
dummyForJoinPreds,
dummyForLocalPreds);
// if covered prefix is the greater than the
// max prefix we have till now, then consider
// this table to start an edge from.
if((tableToConsider == NULL_CA_ID) ||
(maxPrefix < coveredPrefix))
{
tableToConsider = currentTable;
maxPrefix = coveredPrefix;
}
// remove currentTable from list of tables that we
// have already determined cover key prefix
usedConnectedTables.remove(currentTable);
}
}
// finally we have determined the table that provides for
// the greatest key prefix on the fact table, add it to
// the list of connected tables for which we had built edges
usedConnectedTables.insert(tableToConsider);
//build the edge
currentEdge = extendEdge(tableToConsider, availableNodes,lookAhead);
(*listOfEdges).insert(currentEdge);
//edge += currentEdge;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built an edge starting from table "\
<< tableToConsider.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The edge is " \
<< currentEdge.getText()<<endl;
}
#endif
}
// variable used in iteration
CANodeId connectedTable = NULL_CA_ID;
// if there are still some tables left that were on the key columns
// but were not considered earlier because we where optimizing key
// coverage
if(usedConnectedTables.entries() != connectedTables.entries())
{
// build an edge from all the tables in the unusedConnectedTables set
// since they still have predicates on fact table clustering key prefix
// columns and could provide reduction.
CANodeIdSet unusedConnectedTables(connectedTables);
unusedConnectedTables.subtractSet(usedConnectedTables);
for(connectedTable = unusedConnectedTables.init();
unusedConnectedTables.next(connectedTable);
unusedConnectedTables.advance(connectedTable))
{
//build the edge
currentEdge = extendEdge(connectedTable, availableNodes, lookAhead);
(*listOfEdges).insert(currentEdge);
//edge += currentEdge;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built an edge starting from table "\
<< connectedTable.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The edge is " \
<< currentEdge.getText()<<endl;
}
#endif
}
}
// We have the list of edges joined to the fact table in the order
// of the fact table clustering key columns.
// Find the optimal location for the fact table, i.e. we'll try to
// see which tables should be join below the fact table in a left
// deep join sequence.
//
// e.g. consider the following connectivity graph
// fact table cluster key is : a,b,c in that order
//
//
// DIM2
// |
// b
// |
// DIM1-a-FACT-c-DIM3
//
//
// Then the list of edges will be DIM1, DIM2, DIM3
//
// In the following loop we'll estimate the cost of
// doing a nested join on the fact table after
// 1. DIM1,
// 2. DIM1, DIM2
// 3. DIM1, DIM2, DIM3
//
// Then we compare the optimal cost of doing a nested join
// on the fact table, with the cost of just doing a hash
// join on the fact table which basically involves scanning the
// whole fact table.
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Starting to evaluate optimal location of Fact Table "<< endl;
}
#endif
//cost of fact table nested join after
//maximum possible key prefix coverage
CostScalar costForMaxKeyCoverage=0;
CostScalar costOfDimProbes(csZero);
double probeHashTableUnitCost =
(double) (ActiveSchemaDB()->getDefaults()).getAsDouble
(MULTI_JOIN_PROBE_HASH_TABLE);
// iterate over list of edges
for ( i = 0; i < (*listOfEdges).entries(); i++)
{
// get the current edge from the list of edges
currentEdge = (*listOfEdges)[i];
//the edge that was used in the last iteration
CANodeIdSet prevEdgeAndFact = edge;
prevEdgeAndFact += factTable;
// add it to the cumulative edge, i.e. list of
// all tables we are considering to be below the
// fact table
edge += currentEdge;
// get the number of rows coming out of all the tables
// that'll be below the fact tables.
// This is the number of probes going into the fact table
// for this nested join
dataFlowFromEdge = appStatMan->
getStatsForCANodeIdSet(edge)->
getResultCardinality();
if((factIsHashPartitioned) &&
(CmpCommon::getDefault(COMP_BOOL_150) == DF_OFF))
{
ValueIdSet factTableColumnsJoined = factTable.getNodeAnalysis()
->getTableAnalysis()
->getColsConnectedViaEquiJoinPreds(edge);
CollIndex numPartKeyCols = factTablePartKey.entries();
CollIndex numJoinedCols = factTableColumnsJoined.entries();
CollIndex numPartKeyColsCovered = 0;
for (ValueId VId = factTablePartKey.init();
factTablePartKey.next(VId);
factTablePartKey.advance(VId) )
{
ValueIdSet tempJoinedCols = factTableColumnsJoined;
tempJoinedCols.removeCoveredVIdSet(VId);
if(tempJoinedCols.entries() < numJoinedCols)
numPartKeyColsCovered++;
else
break;
}
if(numPartKeyColsCovered < numPartKeyCols)
{
dataFlowFromEdge = dataFlowFromEdge * factTableNumPartitions;
}
}
// figure out the number of fact table rows that'll be scanned for this
// nested join
factTableRowsToScan = appStatMan->
getStatsForJoinPredsOnCKOfJBBC(edge, factTable)->
getResultCardinality();
// figure out the rows coming out of the fact table after this nested join
// using joinJBBChildren(prevEdgeAndFact, currentEdge) instead of
// using joinJBBChildren(edge, factTable), in order to drive ASM using
// joins, 'edge' could have cross products and will miss some cardinality
// adjustments because of join with a potential cross product.
dataFlowFromFactTable = appStatMan->
joinJBBChildren(prevEdgeAndFact, currentEdge)->
getResultCardinality();
// now cost the nested join
factTableCost = computeCostForFactTable(dataFlowFromEdge,
factTableRowsToScan,
factTable,
mjoin);
// capture the cost of the nested join when all edges are join below the
// fact table.
// This will be the cost for max key coverage after the last iteration.
costForMaxKeyCoverage = factTableCost;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "FactTable after edge " \
<< currentEdge.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "The cummulative edge is "\
<< edge.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Probes into fact table: "\
<< istring(Lng32(dataFlowFromEdge.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Fact Table Rows to scan: "\
<< istring(Lng32(factTableRowsToScan.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Rows coming out of fact table: "\
<< istring(Lng32(dataFlowFromFactTable.value()))\
<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "Our cost estimate of fact table nested join: "\
<< istring(Lng32(factTableCost.value()))\
<<endl;
}
#endif //_DEBUG
// if this is the first iteration
// or current nested join cost is cheaper
// than lowest nested join cost
CostScalar combinedCost = costOfDimProbes + factTableCost;
if ((NOT lowestCombinedCost.isGreaterThanZero()) ||
(combinedCost < lowestCombinedCost))
{
lowestCombinedCost = combinedCost;
if (lowestCombinedCost <= factTableHashJoinCost)
{
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
mjStarJoinIRuleWA->nodesJoinedBeforeFactTable_ =
optimalEdgeSet;
// set the optimal location of the fact table in the
// list of edges, this will be used by the nextSubstitute()
// method
mjStarJoinIRuleWA->optimalFTLocation_ = (Int32) i+1;
}
}
#if 0
if ((NOT lowestFactTableCost.isGreaterThanZero()) ||
(factTableCost < lowestFactTableCost))
{
lowestFactTableCost = factTableCost;
if (lowestFactTableCost <= factTableHashJoinCost)
{
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
mjStarJoinIRuleWA->nodesJoinedBeforeFactTable_ =
optimalEdgeSet;
// set the optimal location of the fact table in the
// list of edges, this will be used by the nextSubstitute()
// method
mjStarJoinIRuleWA->optimalFTLocation_ = (Int32) i+1;
}
}
#endif
costOfDimProbes += dataFlowFromEdge * probeHashTableUnitCost;
}
//This variable determines if we want all edges of the star
//that join on the key prefix on the Fact Table, to be joined
//below i.e. before the fact table.
NABoolean forceMaxKeyCoverageOnFT = FALSE;
// COMP_BOOL_12 is used to force a plan with max key coverage on
// the fact table, i.e. join all edges covering key prefix column
// below the fact table to get max key coverage for nested join
// on fact table.
// COMP_BOOL_11 is similar to COMP_BOOL_12. It differs to COMP_BOOL_12
// in that it does not force a max key coverage plan if the cost of
// the nested join is higher than the cost of the hash join on the
// fact table.
// COMP_BOOL_13 basically means to force a star join rule plan.
//if we want to force max key prefix coverage on fact table
//this should only be allowed if fact table nested join is
//cheaper than doing a full scan of the fact table (i.e. the
//hash join scenario)
//If COMP_BOOL_12 is 'ON' then we'll just force a plan with
//max key prefix coverage on fact table
if ((costForMaxKeyCoverage < factTableHashJoinCost) &&
(CmpCommon::getDefault(COMP_BOOL_11) == DF_ON))
forceMaxKeyCoverageOnFT = TRUE;
//If COMP_BOOL_12 is 'ON' then we'll just force a plan with
//max key prefix coverage on fact table
if (CmpCommon::getDefault(COMP_BOOL_12) == DF_ON)
forceMaxKeyCoverageOnFT = TRUE;
//COMP_BOOL_13 means force star join rule !!!
//therefore if starjoin rule was not found feasible
//just force max key coverage i.e. force all the
//edges joining on FT key prefix columns to be below
//fact table
if (((!optimalEdgeSet.entries()) &&
(CmpCommon::getDefault(COMP_BOOL_13) == DF_ON)) &&
(CmpCommon::getDefault(COMP_BOOL_77) != DF_ON))
forceMaxKeyCoverageOnFT = TRUE;
// if we are forcing max key prefix coverage for the
// fact table nested join.
if(forceMaxKeyCoverageOnFT)
{
edge.clear();
// iterate over list of edges
for ( i = 0; i < (*listOfEdges).entries(); i++)
{
currentEdge = (*listOfEdges)[i];
edge += currentEdge;
}
optimalEdgeSet = edge;
// set the set Of tables to be joined before the fact table,
// this will be used during nextSubstitute for this rule.
mjStarJoinIRuleWA->nodesJoinedBeforeFactTable_ =
optimalEdgeSet;
mjStarJoinIRuleWA->optimalFTLocation_ = (Int32) (*listOfEdges).entries();
}
// from the set of tables in all the edges that join to the key prefix
// take out the set of tables that compose the fringes that should be joined
// before the fact table
edge.subtractSet(optimalEdgeSet);
// add the remaining tables to the available nodes set
// these tables will be joined above/after the fact table
// in the left deep join sequence
availableNodes.addSet(edge);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "The tables below fact table"\
<< optimalEdgeSet.getText()<<endl;
CURRCONTEXT_OPTDEBUG->stream() << "The tables above fact table "\
<< availableNodes.getText()<<endl;
}
#endif //_DEBUG
// nodes to be joined after Fact Table
mjStarJoinIRuleWA->availableNodes_ = availableNodes;
// save list of edges in the work area it'll be used when by nextSubstitute()
mjStarJoinIRuleWA->listOfEdges_ = listOfEdges;
// if optimalEdgeSet is empty it could mean
// 1. there were not join predicates on fact table clustering key prefix
// 2. there were join predicates on fact table clustering key prefix but
// nested join on fact table clustering is just too expensive.
// In either case, a type-I nested join plan is not a good idea
if (!optimalEdgeSet.entries())
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Not Matched, there is no significant reduction on fact table" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
// return FALSE, indicating this rule is not a good match for the current
// query
return FALSE;
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Pattern Matched" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIRule_isAStarPattern_End" <<endl;
}
#endif //_DEBUG
return TRUE;
}
void MJStarJoinIRule::sortMJJBBCsByCardAfterLocalKeyPrefixPred(NAList<CANodeId> &sortedJBBCs,const MultiJoin * const mjoin)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = mjoin->getJBBSubset().getJBBCs();
//The following loop performs the sorting of the JBBCs in
//this multi-join and places the sorted list in sortedJBBCs
for(Int32 i = 0; i < mjoin->getArity(); i++){
//To start with, pick an arbitrary child as largest
CANodeId largestChild = childSet.init();
// cast to void to prevent Coverity CHECKED_RETURN error
(void)childSet.next(largestChild);
//get cardinality
EstLogPropSharedPtr estLogProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(largestChild);
CostScalar largestCardinality = estLogProps->getResultCardinality();
for( CANodeId child = largestChild; childSet.next(child); childSet.advance(child))
{
//get cardinality of child i.e. the size of the child
estLogProps = appStatMan->getStatsForLocalPredsOnCKPOfJBBC(child);
CostScalar childCardinality = estLogProps->getResultCardinality();
//if this child's cardinality is larger than the largest child's
//cardinality, set this child to be the largest
if( childCardinality >= largestCardinality)
{
largestChild = child;
largestCardinality=childCardinality;
}
}
//remove the largest child from the child set
childSet.remove(largestChild);
//add largest child to sorted list
sortedJBBCs.insert(largestChild);
}
};//MJStarJoinIRule::sortMJJBBCsByCardAfterLocalKeyPrefixPred
CANodeId MJStarJoinIRule::findFactTable(const MultiJoin * const mjoin,
CostScalar & factTableCKPrefixCardinality,
CANodeId & biggestTable)
{
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = mjoin->getJBBSubset().getJBBCs();
QueryAnalysis* qa = QueryAnalysis::Instance();
JBBSubsetAnalysis* jbbSubsetAnalysis =
qa->findJBBSubsetAnalysis (mjoin->getJBBSubset());
CANodeId factTable = jbbSubsetAnalysis->findFactTable
( childSet, factTableCKPrefixCardinality, biggestTable );
return factTable;
};//MJStarJoinIRule::findFactTable
//extends an edge of the star
CANodeIdSet MJStarJoinRules::extendEdge(CANodeId thisTable,//in
CANodeIdSet& availableNodes,
UInt32 lookAhead)//in\out
{
// CANodeIdSet representing thisTable
CANodeIdSet thisTableSet(thisTable);
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
// return value
CANodeIdSet edge;
//remove this table from the availableNodes
//if it there
if(availableNodes.contains(thisTable))
availableNodes.remove(thisTable);
//extend the edge to include thisTable
edge += thisTable;
if(!availableNodes.entries())
return edge;
if(lookAhead)
lookAhead -=1;
//if(!lookAhead)
// return edge;
// Tables connected to thisTable that are still available
CANodeIdSet connectedTables;
// get all connected tables that are still available
// i.e. not already part of some edge.
connectedTables = thisTable.getNodeAnalysis()
->getJBBC()->getJoinedJBBCs();
connectedTables.intersectSet(availableNodes);
CANodeIdSet connectedTablesBackup = connectedTables;
UInt32 numConnectedTables = connectedTables.entries();
// get after local predicate cardinality of thisTable
CostScalar thisTableCardinality = appStatMan->
getStatsForCANodeId(thisTable)->
getResultCardinality();
//temp var to capture the set representing edge extension
CANodeIdSet edgeExtension;
//temp var to get cardinality of edge as it is being built
CostScalar currentEdgeCardinality;
//variables used for iteration
UInt32 i;
CANodeId connectedTable;
CANodeIdSet connectedTableSet;
const CostScalar EXTENSION_ALLOWANCE(1.05); // We allow 5% increase
// sort connected Tables by cardinality of join with thisTable
for (i = 0;i < numConnectedTables; i++)
{
CostScalar lowestCardinality = 0;
// A table from the set of connectedTables that produces the lowest
// cardinality when joined to thisTable
CANodeId smallestRemainingTable = NULL_CA_ID;
for(connectedTable = connectedTables.init();
connectedTables.next(connectedTable);
connectedTables.advance(connectedTable))
{
//get a CANodeIdSet only containing the connectedTable to estimate
//join of connectedTable with thisTable.
connectedTableSet = connectedTable;
//get cardinality for join of thisTable and connectedTable
CostScalar joinCardinality =
appStatMan->joinJBBChildren(thisTableSet, connectedTableSet)
->getResultCardinality();
if(smallestRemainingTable == NULL_CA_ID)
{
lowestCardinality = joinCardinality;
smallestRemainingTable = connectedTable;
continue;
}
if(joinCardinality < lowestCardinality)
{
lowestCardinality = joinCardinality;
smallestRemainingTable = connectedTable;
}
}
//add the table that produces the lowest cardinality
//after joining with thisTable
//connectedTablesSorted.insert(smallestRemainingTable);
if(smallestRemainingTable == NULL_CA_ID)
continue;
JBBC * smallestRemainingTableJBBC =
smallestRemainingTable.getNodeAnalysis()->getJBBC();
JBBSubset * thisTableJBBSubset =
thisTableSet.jbbcsToJBBSubset();
NABoolean isGuaranteedNonExpandingJoin =
thisTableJBBSubset->
isGuaranteedNonExpandingJoin(*smallestRemainingTableJBBC);
// if the smallestRemaingTable
if((lowestCardinality <= (EXTENSION_ALLOWANCE * thisTableCardinality))&&
(isGuaranteedNonExpandingJoin ||
lookAhead ||
smallestRemainingTableJBBC->isOneRowMax()))
{
//sortedListOfConnectedReducingTables.insert(connectedTable);
//only extend to a table that is still available
if(availableNodes.contains(smallestRemainingTable))
{
edgeExtension = extendEdge(smallestRemainingTable, availableNodes, lookAhead);
// get cardinality for join of edge and edgeExtension
// I don't really need to do this, but doing this to
// better drive the ASM cache
currentEdgeCardinality =
appStatMan->joinJBBChildren(edge, edgeExtension)
->getResultCardinality();
edge += edgeExtension;
}
}
//remove it from connectedTables
connectedTables.remove(smallestRemainingTable);
}
return edge;
}
//MJStarJoinRules::extendEdge
// get a rough estimate of cost for doing a nested join on the fact table
// number of probes = dataFlowFromEdge
// Rows of fact table that will be scanned = factTableRowsToScan
CostScalar MJStarJoinIRule::computeCostForFactTable(CostScalar probes,
CostScalar factTableRowsToScan,
CANodeId factTable,
MultiJoin * mjoin)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
CostScalar costPerProbe ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_0));
CostScalar costPerUnitSize ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_1));
CostScalar costPerRowReturned ((ActiveSchemaDB()->getDefaults()).getAsDouble(COMP_FLOAT_2));
//Record Size of the factTable
CostScalar factTableRecordSize = factTable.getNodeAnalysis()->
getTableAnalysis()->
getTableDesc()->
getNATable()->
getRecordLength();
CostScalar subsetSizeKB = (factTableRowsToScan * factTableRecordSize)/1024;
CostScalar factTableRowsReturned = appStatMan->
getStatsForCANodeId(factTable)->
getResultCardinality();
ExprGroupId factTableExprGroupId = mjoin->getChildFromJBBCId(factTable);
CostScalar sizeOfRowReturnedFromFactTable =
factTableExprGroupId.getGroupAttr()->getRecordLength();
CostScalar costForFactTable = (costPerProbe * probes) +
(costPerUnitSize * subsetSizeKB) +
(costPerRowReturned *
factTableRowsReturned *
sizeOfRowReturnedFromFactTable);
return costForFactTable;
}
// sort list of JBBCs in descending order i.e.
// * first element is largest
// * last element is smallest
// the after local predicate cardinality of a JBBC is used for ordering
void MJStarJoinIRule::sortMJJBBCs(NAList<CANodeId> &sortedJBBCs,const MultiJoin * const mjoin)
{
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
//get CANodeIdSet representing this MultiJoin
CANodeIdSet childSet = mjoin->getJBBSubset().getJBBCs();
//The following loop performs the sorting of the JBBCs in
//this multi-join and places the sorted list in sortedJBBCs
for(Int32 i = 0; i < mjoin->getArity(); i++){
//To start with, pick an arbitrary child as largest
CANodeId largestChild = childSet.init();
// cast to void to prevent Coverity CHECKED_RETURN error
(void)childSet.next(largestChild);
//get cardinality
EstLogPropSharedPtr estLogProps = appStatMan->getStatsForCANodeId(largestChild);
CostScalar largestCardinality = estLogProps->getResultCardinality();
for( CANodeId child = largestChild; childSet.next(child); childSet.advance(child))
{
//get cardinality of child i.e. the size of the child
estLogProps = appStatMan->getStatsForCANodeId(child);
CostScalar childCardinality = estLogProps->getResultCardinality();
//if this child's cardinality is larger than the largest child's
//cardinality, set this child to be the largest
if( childCardinality >= largestCardinality)
{
largestChild = child;
largestCardinality=childCardinality;
}
}
//remove the largest child from the child set
childSet.remove(largestChild);
//add largest child to sorted list
sortedJBBCs.insert(largestChild);
}
};//MJStarJoinIRule::sortMJJBBCs
// -----------------------------------------------------------------------
// methods for MJStarJoinIIRule
// -----------------------------------------------------------------------
NABoolean MJStarJoinIIRule::topMatch (RelExpr * expr,
Context * context)
{
// COMP_BOOL_2 determines if LargeScope rules are tried
if (CmpCommon::getDefault(COMP_BOOL_2) == DF_ON)
return FALSE;
// MJ rules should only fire on a MultiJoin
if (NOT expr->getOperator().match(REL_MULTI_JOIN))
return FALSE;
// Get a handle to the MultiJoin on which we are performing the topMatch()
MultiJoin* mjoin = (MultiJoin*) expr;
// The StarJoin Type-II rule fires if it has been scheduled
// The rule schedule StarJoin Type-I on any MultiJoins present
// in the tree it produces
if(!mjoin->scheduledLSRs().contains(MJStarJoinIIRuleNumber))
return FALSE;
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIIRule_TopMatch_Begin" <<endl;
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIIRule TopMatch on " << mjoin->getJBBSubset().getText() << endl;
CURRCONTEXT_OPTDEBUG->stream() << "Superset is : " << mjoin->getJBBSubset().getJBB()->getMainJBBSubset().getText() << endl;
}
#endif //_DEBUG
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
mjoinAnalysis->analyzeInitialPlan();
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIIRule_TopMatch_End" <<endl;
}
#endif //_DEBUG
return TRUE;
}
RelExpr * MJStarJoinIIRule::nextSubstitute(RelExpr * before,
Context * context,
RuleSubstituteMemory * &memory)
{
if (CmpCommon::getDefault(COMP_BOOL_115) == DF_ON)
return nextSubstitute_Old(before, context, memory);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIIRule_nextSubstitute_Begin" <<endl;
}
#endif //_DEBUG
MultiJoin * mjoin = (MultiJoin *) before;
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
// The list CANodeIdSets representing the join
// sequence produced by this rule
// Element 0: is inner most / top most element in left deep join sequence
// Element N: is left most/outer most element in left deep join sequence
const NAList<CANodeIdSet> * const leftDeepJoinSequence =
mjoinAnalysis->getInitialPlanLeftDeepJoinSequence();
UInt32 numEntriesInLeftDeepJoinSequence = (*leftDeepJoinSequence).entries();
// setup the join directives for the left deep join sequence
NAList<MJJoinDirective *> joinDirectives(CmpCommon::statementHeap(),
numEntriesInLeftDeepJoinSequence-1);
for(UInt32 i=0;
i < (numEntriesInLeftDeepJoinSequence-1);
i++)
{
MJJoinDirective * joinDirective =
new (CmpCommon::statementHeap())
MJJoinDirective(CmpCommon::statementHeap());
CANodeIdSet joinRightChild = (*leftDeepJoinSequence)[i];
JBBC * joinRightChildJBBC = NULL;
if(joinRightChild.entries() == 1)
joinRightChildJBBC = joinRightChild.getFirst().getNodeAnalysis()->getJBBC();
// setup unconditional join directives
joinDirective->setSkipJoinLeftShift();
joinDirective->setJoinSource(Join::STAR_FILTER_JOIN);
// Skip join commutativity if COMP_BOOL_3 is ON
if (CmpCommon::getDefault(COMP_BOOL_3) == DF_ON)
{
joinDirective->setSkipJoinCommutativity();
}
// COMP_BOOL_85 is OFF my default therefore
if ((CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF) &&
joinRightChildJBBC &&
!(joinRightChildJBBC->getOriginalParentJoin() &&
joinRightChildJBBC->getOriginalParentJoin()->isRoutineJoin()))
joinDirective->setSkipNestedJoin();
// COMP_BOOL_7 is OFF my default therefore
// this is essentially unconditional
if (CmpCommon::getDefault(COMP_BOOL_7) == DF_OFF)
joinDirective->setJoinFromPTRule();
if(joinRightChild.entries() > 1)
{
joinDirective->scheduleLSROnRightChild(MJStarJoinIRuleNumber);
}
// any special handling of the last join (i.e. last from the top)
if(i == numEntriesInLeftDeepJoinSequence - 2)
{
CANodeIdSet joinLeftChild = (*leftDeepJoinSequence)[i+1];
if(joinLeftChild.entries() > 1)
{
joinDirective->scheduleLSROnLeftChild(MJStarJoinIRuleNumber);
}
}
joinDirectives.insert(joinDirective);
}
return mjoin->createLeftLinearJoinTree(leftDeepJoinSequence,
&joinDirectives);
}
// Below is the old nextSubstitute. The code is not exercised any more
RelExpr * MJStarJoinIIRule::nextSubstitute_Old(RelExpr * before,
Context * context,
RuleSubstituteMemory * &memory)
{
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "MJStarJoinIIRule_nextSubstitute_Begin" <<endl;
}
#endif //_DEBUG
MultiJoin * mjoin = (MultiJoin *) before;
// Make sure GroupAnalysis localJBBView is the same as mjoin's JBBSubset.
CMPASSERT(mjoin->getJBBSubset() ==
*(mjoin->getGroupAttr()->getGroupAnalysis()->getLocalJBBView()));
//Get a handle to ASM
AppliedStatMan * appStatMan = QueryAnalysis::ASM();
RelExpr * result = NULL;
MJRulesWA * mjRulesWA = mjoin->
getJBBSubset().
getJBBSubsetAnalysis()->
getMJRulesWA();
// get a handle to the JBBSubset Analysis for the JBBSubset
// represented by this MultiJoin (i.e. mjoin)
JBBSubsetAnalysis * mjoinAnalysis = mjoin->
getJBBSubset().
getJBBSubsetAnalysis();
CANodeId factTable = mjRulesWA->factTable_;
if(factTable == NULL_CA_ID)
factTable = mjoinAnalysis->getLargestIndependentNode();
CANodeIdSet factTableSet;
factTableSet.insert(factTable);
// Get list of JBBCs in this Multi Join, the list should be sorted by the
// after local pred cardinality of the JBBC
NAList<CANodeId> * sortedJBBCs = mjoinAnalysis->
getNodesSortedByLocalPredsCard();
// compute list of fringes
//for tracking, which of the JBBCs in the multijoin are still available
CANodeIdSet availableNodes = mjoin->getJBBSubset().getJBBCs();
availableNodes.remove(factTable);
// get tables directly connected to Fact Table
CANodeIdSet connectedTables = factTable.getNodeAnalysis()->getJBBC()
->getJoinedJBBCs();
connectedTables += factTable.getNodeAnalysis()->getJBBC()
->getJBBCsRequiringInputFromMe();
// remove connected tables from availableNodes
// this is because we want to make sure only
// one directly connected table per fringer
availableNodes -= connectedTables;
//List of fringes
NAList<CANodeIdSet> listOfFringes(CmpCommon::statementHeap(),30);
CANodeIdSet currentEdge;
UInt32 lookAhead = (ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_18);
for(UInt32 j=(*sortedJBBCs).entries(); j > 0; j--)
{
CANodeId jbbc = (*sortedJBBCs)[(j-1)];
if (connectedTables.contains(jbbc))
{
NABoolean isLegalAddition = factTableSet.legalAddition(jbbc);
if(!isLegalAddition)
{
availableNodes += jbbc;
continue;
}
Join * jbbcParentJoin =
jbbc.getNodeAnalysis()->getJBBC()->getOriginalParentJoin();
currentEdge = jbbc;
CANodeIdSet attachedNodes;
//build the fringe
if((!jbbcParentJoin) ||
(jbbcParentJoin->isInnerNonSemiJoin()))
{
currentEdge = extendEdge(jbbc, availableNodes,lookAhead);
// attachedNodes are nodes connected
// to jbbc by join on their key
attachedNodes =
jbbc.getNodeAnalysis()->
getJBBC()->
getJBBCsConnectedViaKeyJoins();
attachedNodes.intersectSet(availableNodes);
}
if (CmpCommon::getDefault(COMP_BOOL_49) == DF_OFF)
{
currentEdge += attachedNodes;
availableNodes -= attachedNodes;
}
listOfFringes.insert(currentEdge);
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Built a fringe starting from table "\
<< jbbc.getText()<< endl;
CURRCONTEXT_OPTDEBUG->stream() << "The fringe is " \
<< currentEdge.getText()<<endl;
}
#endif
}
else{
continue;
}
}
#ifdef _DEBUG
if ( CmpCommon::getDefault( NSK_DBG ) == DF_ON &&
CmpCommon::getDefault( NSK_DBG_MJRULES_TRACKING ) == DF_ON )
{
CURRCONTEXT_OPTDEBUG->stream() << "Following tables are not part of any fringe, " \
<< "put each of them as a fringe by itself: \n" \
<< availableNodes.getText()<< endl;
}
#endif
// create fringes out of each one of the remaining tables
// i.e. tables that have not been included as part of any
// fringe
for (CANodeId availableNode = availableNodes.init();
availableNodes.next(availableNode);
availableNodes.advance(availableNode))
{
listOfFringes.insert(availableNode);
}
// compute join order
NAList<CANodeIdSet> orderedListOfFringes(STMTHEAP);
orderedListOfFringes.insert(factTableSet);
while (listOfFringes.entries())
{
// fringe that produces lowest cardinality when joined to the factTableSet
CANodeIdSet smallestFringe;
CostScalar lowestCardinality = 0;
for (UInt32 i = 0;
i < listOfFringes.entries();
i ++)
{
CANodeIdSet currentFringe = listOfFringes[i];
if(currentFringe.entries() == 1)
{
CANodeId fringeNode =
currentFringe.getFirst();
JBBC * fringeNodeJBBC =
fringeNode.getNodeAnalysis()->getJBBC();
CANodeIdSet jbbcsFringeNodeDependsOn =
fringeNodeJBBC->getPredecessorJBBCs();
if(!factTableSet.contains(jbbcsFringeNodeDependsOn))
continue;
}
// get cardinality for joining factTableSet to currentFringe
CostScalar joinCardinality =
appStatMan->
joinJBBChildren(factTableSet, currentFringe)->
getResultCardinality();
if((!smallestFringe.entries()) ||
(joinCardinality < lowestCardinality))
{
lowestCardinality = joinCardinality;
smallestFringe = currentFringe;
}
}
listOfFringes.remove(smallestFringe);
orderedListOfFringes.insert(smallestFringe);
factTableSet.insert(smallestFringe);
}
// join order is not set, the left deep join order going
// from the outer most to the inner most is:
// Fact Table, orderedListOfFringes[0], orderedListOfFringest[1] ....., orderedListOfFringes[n]
// construct the left deep join sequence going from the inner most -> outer most
//
// The list CANodeIdSets representing the join
// sequence produced by this rule
// Element 0: is inner most / top most element in left deep join sequence
// Element N: is left most/outer most element in left deep join sequence
NAList<CANodeIdSet> leftDeepJoinSequence (CmpCommon::statementHeap(),
mjoin->getJBBSubset().getJBBCs().entries());
for(UInt32 k = orderedListOfFringes.entries(); k > 0; k--)
{
leftDeepJoinSequence.insert(orderedListOfFringes[(k-1)]);
}
// Consolidate the fringes
mjoinAnalysis->consolidateFringes(factTable, leftDeepJoinSequence);
// end - construct left deep join sequence
// setup the join directives for the left deep join sequence
NAList<MJJoinDirective *> joinDirectives(CmpCommon::statementHeap(),orderedListOfFringes.entries()-1);
UInt32 numEntriesInLeftDeepJoinSequence = leftDeepJoinSequence.entries();
for(UInt32 i=0;
i < (numEntriesInLeftDeepJoinSequence-1);
i++)
{
MJJoinDirective * joinDirective = new MJJoinDirective(CmpCommon::statementHeap());
CANodeIdSet joinRightChild = leftDeepJoinSequence[i];
JBBC * joinRightChildJBBC = NULL;
if(joinRightChild.entries() == 1)
joinRightChildJBBC = joinRightChild.getFirst().getNodeAnalysis()->getJBBC();
// setup unconditional join directives
joinDirective->setSkipJoinLeftShift();
joinDirective->setJoinSource(Join::STAR_FILTER_JOIN);
// Skip join commutativity if COMP_BOOL_3 is ON
if (CmpCommon::getDefault(COMP_BOOL_3) == DF_ON)
{
joinDirective->setSkipJoinCommutativity();
}
// COMP_BOOL_85 is OFF my default therefore
if ((CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF) &&
joinRightChildJBBC &&
!(joinRightChildJBBC->getOriginalParentJoin() &&
joinRightChildJBBC->getOriginalParentJoin()->isRoutineJoin()))
joinDirective->setSkipNestedJoin();
// COMP_BOOL_7 is OFF my default therefore
// this is essentially unconditional
if (CmpCommon::getDefault(COMP_BOOL_7) == DF_OFF)
joinDirective->setJoinFromPTRule();
if(joinRightChild.entries() > 1)
{
joinDirective->scheduleLSROnRightChild(MJStarJoinIRuleNumber);
}
// any special handling of the last join (i.e. last from the top)
if(i == numEntriesInLeftDeepJoinSequence - 2)
{
CANodeIdSet joinLeftChild = leftDeepJoinSequence[i+1];
if(joinLeftChild.entries() > 1)
{
joinDirective->scheduleLSROnLeftChild(MJStarJoinIRuleNumber);
}
}
joinDirectives.insert(joinDirective);
}
return mjoin->createLeftLinearJoinTree(&leftDeepJoinSequence,
&joinDirectives);
}
// This class was used by old code, that is not exercised anymore
MJRulesWA::MJRulesWA(JBBSubsetAnalysis * analysis)
{
CMPASSERT(analysis);
analysis_ = analysis;
centerTableComputed_ = FALSE;
centerTable_ = NULL_CA_ID;
centerTableRowsScanned_ = -1;
centerTableDataScanned_ = -1;
centerTableDataPerPartition_ = -1;
centerTablePartitions_ = -1;
centerTableConnectivity_ = 0,
maxDimensionConnectivity_ = 0,
factTable_ = NULL_CA_ID;
largestTable_ = NULL_CA_ID;
largestNode_ = NULL_CA_ID;
}