blob: 0be2941365c7fcee0935bec5234ae58a333bcd3f [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: Rule.C
* Description: Cascades optimization rules
*
* Created: 9/14/94
* Language: C++
*
*
*
*
******************************************************************************
*/
// -----------------------------------------------------------------------
#include "Sqlcomp.h"
#include "TransRule.h"
#include "ImplRule.h"
#include "opt.h"
#include "EstLogProp.h"
#include "DefaultConstants.h"
#include "CmpContext.h"
// -----------------------------------------------------------------------
// Reinitialize the rule set *in place*: to be used only if an exception is
// caught in opt.cpp -- the exception (assertion) needs to unbind all rules
// (context heap) from the memo (statement heap, about to be deleted).
// -----------------------------------------------------------------------
void ReinitRuleSet(RuleSet* rules)
{
if (rules)
{
Lng32 ruleCnt = rules->getCountOfRules();
while (ruleCnt--)
{
Rule *rule = rules->rule(ruleCnt);
if (rule->getSubstitute())
rule->getSubstitute()->releaseBindingTree(TRUE/*moribundMemo*/);
}
}
}
/* ============================================================ */
/*
Rules
=====
both transformation and implementation rules -- By default, a rule
is a simple rule, meaning that the substitute given in the rule
sufficiently represents the transformed expression after cut
operators in the rule have been bound to proper children.
*/
Rule::Rule (const char * name, RelExpr * pattern, RelExpr * substitute)
{
if (name == NULL)
// all current rules have name, which is a good practice
name_ = "no user name";
else
name_ = name;
pattern_ = pattern;
substitute_ = substitute;
prepare();
// a pattern can't have a tree node as its top node
if (pattern_)
CMPASSERT(NOT pattern_->isSubtreeOp());
} // Rule::Rule
// Rules are created once and never deleted
Rule::~Rule ()
{
delete pattern_;
delete substitute_;
} // Rule::~Rule
// print methods are for debugging
void Rule::print (FILE * f, const char * prefix, const char * suffix)
{
fprintf (f, "%sRule \"%s\" :\n",
prefix, name_);
pattern_ -> print (f, " Pattern = ", "");
substitute_ -> print (f, " Substitute = ", "");
fprintf (f, "%s\n", suffix);
} // Rule::print
NABoolean Rule::isImplementationRule() const
{
// by default, assume that a copy of the substitute is returned
return (substitute_->isPhysical());
}
NABoolean Rule::isTransformationRule() const
{
// by default, assume that a rule is not both transformation and
// implementation rule
return NOT isImplementationRule();
}
NABoolean Rule::isEnforcerRule() const
{
// For enforcer rules, the pattern is always a cutOp meaning that
// it is always applicable. Enforcer rules are useful for generating
// physical operators which enforces desired physical properties.
return (pattern_ && pattern_->isCutOp());
}
NABoolean Rule::isContextSensitive() const
{
// By default, consider implementation rules (which include
// enforcer rules) to be context-sensitive.
return isImplementationRule();
}
NABoolean Rule::isPassSensitive() const
{
// By default, assume the rule is not pass sensitive
return FALSE;
}
NABoolean Rule::isPatternSensitive() const
{
// By default, assume the rule is not pattern sensitive
return FALSE;
}
NABoolean Rule::canBePruned(RelExpr * expr) const
{
// By default, any rule can be pruned unless otherwise defined
// in its own implementation of the function.
// This function is only called after a successfull topMatch()
// i.e. within the processing of an ApplyRuleTask.
// The purpose of this function is to protect some rule-expr
// pairs from heuristic or randem pruning.
return TRUE;
}
NABoolean Rule::topMatch(RelExpr * relExpr,
Context *)
{
// default implementation should not be invoked for expression-
// insensitive rules (like enforcer rules).
CMPASSERT(relExpr != NULL);
return relExpr->patternMatch(*pattern_);
}
RelExpr * Rule::nextSubstitute (RelExpr * before,
Context *,
RuleSubstituteMemory *& /*memory*/)
{
RelExpr *result;
// the default implementation of a rule fires only once, therefore
// no RuleSubstituteMemory is allocated
// build the result expression from the rule's pattern, the skeleton
// of the rule result, and the original expression "before".
result = substitute_->copyTree(CmpCommon::statementHeap());
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
// now set the isinBlockStmt flag for all nodes in the tree.
result->setBlockStmtRecursively(before->isinBlockStmt());
return result;
} // Rule::nextSubstitute
Guidance * Rule::guidanceForExploringSubstitute (Guidance *)
{
return ( NULL );
} // Rule::guidanceForExploringSubstitute
Guidance * Rule::guidanceForOptimizingSubstitute (Guidance *,
Context *)
{
return ( NULL );
} // Rule::guidanceForOptimizingSubstitute
Guidance * Rule::guidanceForExploringChild(Guidance *,
Context *,
Lng32)
{
return ( NULL );
} // Rule::guidanceForExploringChild
Guidance * Rule::guidanceForOptimizingChild(Guidance *,
Context *,
Lng32)
{
return ( NULL );
} // Rule::guidanceForOptimizingChild
Int32 Rule::promiseForExploration (RelExpr * expr,
RelExpr *,
Guidance *)
{
// by default, explore in exhaustive search
// (only transformation rules are used for exploration)
/*if (expr && CURRSTMT_OPTDEFAULTS->pruneByOptDefaults(this, expr))
return 0;*/
return ( DefaultTransRulePromise );
} // Rule::promiseForExploration
Int32 Rule::promiseForOptimization (RelExpr * expr,
Guidance *,
Context *)
{
/*if (expr && CURRSTMT_OPTDEFAULTS->pruneByOptDefaults(this, expr))
return 0;*/
if (isImplementationRule())
{
return DefaultImplRulePromise;
}
else
{
return DefaultTransRulePromise;
}
} // Rule::promiseForOptimization
void Rule::prepare()
{
// an array to hold pointers for cuts of the pattern, ordered by index
// (most rules should get by with 4 cuts and 4 wildcards)
ARRAY(CutOp *) cuts(HEAP, 4);
ARRAY(WildCardOp *) wildcards(HEAP, 4);
// fill the "cuts" array
if (pattern_)
(void *) checkAndBindPattern(pattern_,cuts,wildcards,FALSE);
// now replace all cut nodes in the substitute by the corresponding
// cut nodes of the pattern
if (substitute_)
substitute_ = checkAndBindPattern(substitute_,cuts,wildcards,TRUE);
} // Rule::prepare
RelExpr * Rule::checkAndBindPattern(RelExpr * patt,
ARRAY(CutOp *) & cuts,
ARRAY(WildCardOp *) & wildcards,
NABoolean isSubstitute)
{
RelExpr *result = patt;
if (patt->isCutOp())
{
CutOp *acut = (CutOp *) patt;
Int32 ix = acut->getIndex();
if (!isSubstitute)
{
if (cuts.used(ix))
// error, a cut index occurs twice in the pattern
ABORT("encountered pattern with duplicate cut nodes");
else
cuts.insertAt(ix,acut);
}
else
{
// this is a substitute, replace the cut node with the
// corresponding cut node from the pattern (both pattern and
// substitute then point to the same cut node)
if (NOT cuts.used(ix))
ABORT("cut in substitute doesn't match the pattern");
result = cuts[ix];
delete patt;
patt = result;
}
} // pattern is a cut
else
if (patt->isWildcard())
{
WildCardOp *wild = (WildCardOp *) patt;
Int32 designator = wild->getDesignator();
if (!isSubstitute)
{
// fill in the appropriate entry of the wildcards array
if (wildcards.used(designator))
// error, a wildcard index occurs twice in the pattern
ABORT("pattern with duplicate wildcard designators");
else
wildcards.insertAt(designator,wild);
}
else
{
if (NOT wildcards.used(designator))
ABORT("wildcard in substitute has an invalid designator");
// this is a substitute, associate the wildcard node in the
// substitute with the wildcard node in the pattern
wild->setCorrespondingNode(wildcards[designator]);
// unlike cut nodes, wildcards are NOT shared
// between the pattern and the substitute
}
} // pattern is a wildcard
// recurse
Int32 arity = patt->getArity();
for (Lng32 i = 0; i < arity; i++)
{
result->child(i) = checkAndBindPattern(patt->child(i)->castToRelExpr(),
cuts,
wildcards,
isSubstitute);
}
return result;
} // Rule::checkAndBindPattern
// Can this rule potentially generate the specified pattern?
// NOTE: This method must be defined for those rules that specifies
// multiple substitutes, or those rules for which the substitute is
// not a true indication of the substitute expression.
NABoolean Rule::canMatchPattern (const RelExpr * pattern) const
{
return substitute_->getOperator().match(pattern->getOperator());
}
// -----------------------------------------------------------------------
// methods for class RuleSubset
// -----------------------------------------------------------------------
RuleSubset::RuleSubset(CollHeap* h) :
SUBARRAY(Rule *)(&GlobalRuleSet->allRules_,h) {}
RuleSubset::RuleSubset (const RuleSubset & orig, CollHeap * h) :
SUBARRAY(Rule *)(orig,h) {}
// -----------------------------------------------------------------------
// methods for class RuleSet
// -----------------------------------------------------------------------
RuleSet::RuleSet(Int32 approxNumRules, CollHeap* h) :
allRules_(h, approxNumRules),
passNRules_(h),
oldRules_(&allRules_,h),
transRules_(&allRules_,h),
implRules_(&allRules_,h),
enforcerRules_(&allRules_,h),
contextSensitiveRules_(&allRules_,h),
passSensitiveRules_(&allRules_,h),
patternSensitiveRules_(&allRules_,h),
ruleApplCount_(0)
{
initializeCurrentPassNumber();
setTotalPasses(); // set the total number of optimization passes
initializeAllPasses();
}
// Rules are created once and never deleted
RuleSet::~RuleSet()
{
for (Lng32 i = 0; i < (Lng32)allRules_.entries(); i++)
delete allRules_[i];
for (Lng32 i = 0; i < (Lng32)passNRules_.entries(); i++)
delete passNRules_[i];
}
void RuleSet::insert(Rule * r)
{
Lng32 num = r->ruleNumber_ = allRules_.entries();
allRules_.insertAt(num,r);
if (num >= MAX_RULE_COUNT)
ABORT("Increase max number of rules in Rule.h");
if (r->isImplementationRule())
implRules_ += num;
if (r->isTransformationRule())
transRules_ += num;
if (r->isEnforcerRule())
enforcerRules_ += num;
if (r->isContextSensitive())
contextSensitiveRules_ += num;
if (r->isPassSensitive())
passSensitiveRules_ += num;
if (r->isPatternSensitive())
patternSensitiveRules_ += num;
}
void RuleSet::setTotalPasses()
{
NAString value(CmpCommon::statementHeap());
// Only do the first pass if the optimization level is MINIMUM or
// if the query complexity is greater than that for 32 regular joins
// (2^32 = INT_MAX), and the join order is not fixed by the user.
// This is because we assume that performing more than 32 joins will
// consume too many resources and take too long, even at MEDIUM opt.
// level.
// limit increase to 40 way join i.e. (40 * 2 ^ 39)
if (!CURRENTSTMT &&
(CmpCommon::getDefault(OPTIMIZATION_LEVEL) == DF_MINIMUM))
{
totalPasses_ = 1;
}
else if (!CURRENTSTMT)
{
totalPasses_ = MAX_NUM_OF_PASSES;
}
else if ((CURRSTMT_OPTDEFAULTS->optLevel() == OptDefaults::MINIMUM) OR
((CURRSTMT_OPTDEFAULTS->getQueryComplexity() > 3e13) AND
NOT CURRSTMT_OPTDEFAULTS->joinOrderByUser()))
{
totalPasses_ = 1;
}
else
{
totalPasses_ = MAX_NUM_OF_PASSES;
}
//---------------------------------------------------------
// Optimization Level = 0 (minimum level) is the same as
// optimization pass 1 today.
// For now, all other optimization levels equate to full
// optimization level today (i.e. optimization pass 2).
//---------------------------------------------------------
}
void RuleSet::enable(NAUnsigned ruleNo, Lng32 fromPass, Lng32 toPassIncl)
{
CMPASSERT(
fromPass >= getFirstPassNumber() AND toPassIncl >= getFirstPassNumber());
if (fromPass <= MAX_NUM_OF_PASSES AND toPassIncl <= MAX_NUM_OF_PASSES)
for (Lng32 i = fromPass; i <= toPassIncl; i++)
*(passNRules_[i]) += ruleNo;
}
void RuleSet::disable(NAUnsigned ruleNo, Lng32 fromPass, Lng32 toPassIncl)
{
CMPASSERT(
fromPass >= getFirstPassNumber() AND toPassIncl >= getFirstPassNumber());
if (fromPass <= MAX_NUM_OF_PASSES AND toPassIncl <= MAX_NUM_OF_PASSES)
for (Lng32 i = fromPass; i <= toPassIncl; i++)
*(passNRules_[i]) -= ruleNo;
}
void RuleSet::initializeAllPasses()
{
Lng32 index;
for (index = 0; index < getFirstPassNumber(); index++)
passNRules_.insertAt(index,NULL);
for (index = getFirstPassNumber(); index <= MAX_NUM_OF_PASSES; index++)
passNRules_.insertAt(index,
new(CmpCommon::contextHeap())
RuleSubset(&allRules_, CmpCommon::contextHeap()));
}
void RuleSet::initializeFirstPass()
{
ruleApplCount_ = 0;
initializeCurrentPassNumber();
oldRules_.clear();
// In the case that optimization_level has been reset dynamically
// via the CONTROL QUERY DEFAULT statement, reset the total pass count.
setTotalPasses();
}
// this is called by the old optimization driver i.e. RelExpr::optimize
// the new optimization driver is method RelExpr::optimize2
NABoolean RuleSet::nextPass()
{
if (NOT inLastPass())
{
incrementCurrentPassNumber();
oldRules_ += *applicableRules();
return TRUE;
}
else
return FALSE;
}
void RuleSet::setCurrentPassNumber(Lng32 passNumber)
{
currentPass_ = passNumber;
oldRules_ += *applicableRules();
}
// -----------------------------------------------------------------------
// methods for class RuleSubstituteMemory
// -----------------------------------------------------------------------
RuleSubstituteMemory::~RuleSubstituteMemory() {}
RelExpr * RuleSubstituteMemory::getNextSubstitute()
{
RelExpr * result;
if (NOT getFirst(result))
result = NULL;
return result;
}
// -----------------------------------------------------------------------
// methods for RulesPerContext
// -----------------------------------------------------------------------
RulesPerContext::RulesPerContext (const Context * const context,CollHeap* h)
: context_(context)
, triedRules_(h)
{
triedRules_.clear(); // start with a clear bitmap
}
// -----------------------------------------------------------------------
// methods for RulesPerContextList
// -----------------------------------------------------------------------
// Get the Rules that have been applied to this context
const RuleSubset & RulesPerContextList::getTriedRules(const Context* const context) const
{
for (Lng32 i= 0; i< (Lng32)entries(); i++)
{
const Context* const other = (*this)[i]->getContext();
if (context == other)
return (*this)[i]->getTriedRules();
}
RuleSubset* emptyRuleSubset =
new(CmpCommon::statementHeap()) RuleSubset(CmpCommon::statementHeap());
emptyRuleSubset->clear();
return *emptyRuleSubset;
}
// Has the provided rule been applied already for the given context?
NABoolean RulesPerContextList::applied (const Context * const context,
NAUnsigned ruleNumber) const
{
Lng32 i = 0;
while (i < (Lng32)entries())
{
const Context* const other = (*this)[i]->getContext();
if (context == other)
return ((*this)[i]->getTriedRules().contains (ruleNumber));
else
i++;
}
return FALSE;
}
// not called anywhere in the code
//
// Has the provided rule been applied in any prior context which has the
// provided set of input logical properties?
NABoolean RulesPerContextList::applied (const EstLogPropSharedPtr& inputLogProp,
NAUnsigned ruleNumber) const
{
Lng32 i = 0;
while (i < (Lng32)entries())
{
if ((*this)[i]->getContext() != NULL AND
(*this)[i]->getContext()->getInputLogProp()->compareEstLogProp(inputLogProp) == SAME)
return ((*this)[i]->getTriedRules().contains (ruleNumber));
else
i++;
}
return FALSE;
}
void RulesPerContextList::addRule (const Context* const context,
NAUnsigned ruleNumber)
{
Lng32 i = 0;
while (i < (Lng32)entries())
{
if (context == (*this)[i]->getContext())
{
// mark this rule as already applied for this context
(*this)[i]->triedRules() += ruleNumber;
return;
}
else
i++;
}
// context not found? Create a new entry for this context.
RulesPerContext *newEntry = new (CmpCommon::statementHeap())
RulesPerContext (context,(CmpCommon::statementHeap()));
newEntry->triedRules() += ruleNumber;
// insert this new structure at the head of the list
insertAt (0, newEntry);
}
// method is not called anywhere
void RulesPerContextList::removeRule (const Context* const context,
NAUnsigned ruleNumber)
{
Lng32 i = 0;
while (i < (Lng32)entries())
{ // is this enough? Do we need to check for the SAME context here?
if (context == (*this)[i]->getContext())
{
// remove this rule from already applied for this context
(*this)[i]->triedRules() -= ruleNumber;
return;
}
else
i++;
}
}
// -----------------------------------------------------------------------
// methods for class Guidance
// -----------------------------------------------------------------------
Guidance::~Guidance() {}
const RuleSubset * Guidance::applicableRules()
{
// default implementation, return all applicable rules
return GlobalRuleSet->applicableRules();
}