blob: fdbb58c517ea77f3430aa20d4c46465d973d83f6 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: TransRule.C
* Description: DBI-defined transformation rules
*
* Created: 9/14/94
* Language: C++
*
*
*
*
******************************************************************************
*/
#include "Sqlcomp.h"
#include "GroupAttr.h"
#include "AppliedStatMan.h"
#include "opt.h"
#include "PhyProp.h"
#include "TransRule.h"
#include "AllRelExpr.h"
#include "RelSample.h"
#include "AllItemExpr.h"
#include "EstLogProp.h"
#include "CmpContext.h"
#include "NormWA.h"
#include "Analyzer.h"
#include "MultiJoin.h"
// -----------------------------------------------------------------------
// Global variables
// -----------------------------------------------------------------------
THREAD_P NAUnsigned RoutineJoinToTSJRuleNumber;
THREAD_P NAUnsigned JoinToTSJRuleNumber;
THREAD_P NAUnsigned JoinCommutativityRuleNumber;
THREAD_P NAUnsigned JoinLeftShiftRuleNumber;
THREAD_P NAUnsigned FilterRule0Number;
THREAD_P NAUnsigned FilterRule1Number;
THREAD_P NAUnsigned FilterRule2Number;
THREAD_P NAUnsigned MJEnumRuleNumber;
THREAD_P NAUnsigned MJStarJoinIRuleNumber;
THREAD_P NAUnsigned MJStarJoinIIRuleNumber;
THREAD_P NAUnsigned MJExpandRuleNumber;
THREAD_P NAUnsigned MVQRRewriteRuleNumber;
// -----------------------------------------------------------------------
// Function to add transformation rules to the rule set
// -----------------------------------------------------------------------
void CreateTransformationRules(RuleSet* set)
{
Rule *r;
r = new (CmpCommon::contextHeap())
MJStarJoinIRule
("MultiJoin StarJoin I Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getFirstPassNumber());
MJStarJoinIRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
MJStarJoinIIRule
("MultiJoin StarJoin II Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getFirstPassNumber());
MJStarJoinIIRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
MVQRRule
("MVQR Rewrite Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
MVQRRewriteRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
MVQRScanRule
("MVQR Scan Rewrite Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
r = new (CmpCommon::contextHeap())
MJExpandRule
("MultiJoin Expand Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
MJExpandRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
MJEnumRule
("MultiJoin Enumeration Rule",
NULL,
NULL
);
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
MJEnumRuleNumber = r->getNumber();
r = new(CmpCommon::contextHeap())
JoinCommutativityRule ("Join commutativity",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TS_INNER_JOIN,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TS_INNER_JOIN,
0,
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
JoinCommutativityRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
JoinLeftShiftRule
("Left shift rule for inner joins",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TSJ_JOIN,
0,
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TSJ_JOIN,
1,
new (CmpCommon::contextHeap())
CutOp(0,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(2, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TSJ_JOIN,
1,
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_NON_TSJ_JOIN,
0,
new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
CutOp(2, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Filter(new(CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
JoinLeftShiftRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
IndexJoinRule1
("Transform a scan into index joins (pass 1)",
new (CmpCommon::contextHeap())
Scan( REL_SCAN, CmpCommon::contextHeap() ),
new (CmpCommon::contextHeap())
Join(new (CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
REL_JOIN,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
set->enable (r->getNumber(),
set->getFirstPassNumber());
r = new (CmpCommon::contextHeap())
IndexJoinRule2
("Transform a scan into index joins (pass 2)",
new (CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Join( new(CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
REL_JOIN,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
r = new(CmpCommon::contextHeap())
OrOptimizationRule ("Transform scan to union for OR optimization",
new (CmpCommon::contextHeap())
Scan( REL_SCAN, CmpCommon::contextHeap() ),
new (CmpCommon::contextHeap())
MapValueIds(
new (CmpCommon::contextHeap())
Union(
new(CmpCommon::contextHeap())
Scan(REL_SCAN,
CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
Scan(REL_SCAN,
CmpCommon::contextHeap()),
NULL,
NULL,
REL_UNION,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
r = new(CmpCommon::contextHeap()) RoutineJoinToTSJRule
("RoutineJoin to TSJ Rule",
new(CmpCommon::contextHeap())
Join(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
REL_ROUTINE_JOIN,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
Join(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
Filter (new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
REL_TSJ,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap() ));
set->insert(r);
set->enable(r->getNumber());
RoutineJoinToTSJRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
JoinToTSJRule
("Transform Join to TSJ",
new (CmpCommon::contextHeap())
WildCardOp (REL_ANY_NON_TSJ_JOIN,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Join (new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Filter (new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
REL_JOIN,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
// xxx need to make this first pass after we remove the special
// code for Anti-semi join and embedded deletes. For now its already
// enabled in pass 1 at start of optimization unless comp_bool_71 is ON
set->enable (r->getNumber(),
set->getSecondPassNumber());
JoinToTSJRuleNumber = r->getNumber();
r = new (CmpCommon::contextHeap())
TSJFlowRule
("Transform GenericUpdate to TSJFlow expression",
new (CmpCommon::contextHeap())
WildCardOp
(REL_ANY_UNARY_GEN_UPDATE,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Join (new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp (REL_ANY_LEAF_GEN_UPDATE,
0,
NULL,
NULL,
CmpCommon::contextHeap()),
REL_TSJ_FLOW,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
set->enable (r->getNumber());
// Raj P - 10/2000
// Rule to fire for SPJ CALL statement
r = new (CmpCommon::contextHeap())
TSJUDRRule
("Transform CALL nodes to TSJUDR expression",
new (CmpCommon::contextHeap())
WildCardOp
(REL_ANY_UNARY_GEN_UPDATE,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Join (new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp (REL_ANY_LEAF_GEN_UPDATE,
0,
NULL,
NULL,
CmpCommon::contextHeap()),
REL_TSJ_FLOW,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
set->enable (r->getNumber());
r = new (CmpCommon::contextHeap())
TSJRule
("Transform GenericUpdate to TSJ expression",
new (CmpCommon::contextHeap())
WildCardOp
(REL_ANY_UNARY_GEN_UPDATE,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Join (new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp (REL_ANY_LEAF_GEN_UPDATE,
0,
NULL,
NULL,
CmpCommon::contextHeap()),
REL_TSJ,
NULL,
FALSE,
FALSE,
CmpCommon::contextHeap()));
set->insert(r);
set->enable (r->getNumber());
r = new (CmpCommon::contextHeap())
FilterRule0
("Transform Filter on a leaf",
new (CmpCommon::contextHeap())
Filter(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_LEAF_OP,
0,
NULL,
NULL,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_LEAF_OP,
0,
NULL,
NULL,
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
FilterRule0Number = r->getNumber();
r = new (CmpCommon::contextHeap())
FilterRule1
("Transform Filter on a unary op",
new (CmpCommon::contextHeap())
Filter(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_UNARY_OP,
0,
new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_UNARY_OP,
0,
new (CmpCommon::contextHeap())
Filter(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
FilterRule1Number = r->getNumber();
r = new (CmpCommon::contextHeap())
FilterRule2
("Transform Filter on a binary op",
new (CmpCommon::contextHeap())
Filter(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_BINARY_OP,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_BINARY_OP,
0,
new (CmpCommon::contextHeap())
Filter(new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Filter(new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
FilterRule2Number = r->getNumber();
r = new (CmpCommon::contextHeap())
GroupByEliminationRule
("Eliminate unnecessary groupbys",
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getFirstPassNumber());
r = new (CmpCommon::contextHeap())
GroupByMVQRRule
("MVQR GroupBy Rewrite Rule",
// pattern
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
// substitute
new (CmpCommon::contextHeap())
MapValueIds(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap())));
set->insert(r);
set->enable (r->getNumber(),
set->getSecondPassNumber());
r = new (CmpCommon::contextHeap())
GroupBySplitRule
("Split a groupby",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
MapValueIds(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
GroupByAgg( new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getFirstPassNumber());
r = new (CmpCommon::contextHeap())
AggrDistinctEliminationRule
("Eliminate aggregate distinct rule",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
MapValueIds(new(CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
r = new (CmpCommon::contextHeap())
GroupByTernarySplitRule
("Split a partial groupby that is a leaf",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
MapValueIds(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
1,
new (CmpCommon::contextHeap())
GroupByAgg(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
r = new (CmpCommon::contextHeap())
GroupByOnJoinRule
("Push groupby down past a join",
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
1,
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
r = new (CmpCommon::contextHeap())
PartialGroupByOnTSJRule
("Push partial groupby down past a tsj",
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_TSJ,
1,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_TSJ,
1,
new (CmpCommon::contextHeap())
GroupByAgg(new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
r = new(CmpCommon::contextHeap()) ShortCutGroupByRule
("Transform anytrue subquery groupby or min/max aggr to shortcut groupby",
new(CmpCommon::contextHeap())
WildCardOp(REL_ANY_GROUP,
0,
new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
NULL,
CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
ShortCutGroupBy(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
REL_SHORTCUT_GROUPBY,
NULL,
NULL,
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
r = new(CmpCommon::contextHeap()) CommonSubExprRule
("Eliminate any CommonSubExpr nodes left from the normalizer - for now",
new(CmpCommon::contextHeap())
CommonSubExprRef(new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
"",
CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
r = new (CmpCommon::contextHeap()) SampleScanRule
("Transform RelSample above a Scan",
new (CmpCommon::contextHeap())
RelSample(new(CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()),
RelSample::ANY,
NULL,
NULL,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
Scan(REL_SCAN, CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
//++MV,
r = new(CmpCommon::contextHeap())
JoinToBushyTreeRule("Join to bush tree",
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
0,
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
1,
new (CmpCommon::contextHeap())
CutOp(0,
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
CutOp(2, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
0,
new (CmpCommon::contextHeap())
CutOp(0, CmpCommon::contextHeap()),
new (CmpCommon::contextHeap())
WildCardOp(REL_ANY_INNER_JOIN,
1,
new(CmpCommon::contextHeap())
CutOp(1, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
CutOp(2, CmpCommon::contextHeap()),
CmpCommon::contextHeap()),
CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber(),
set->getSecondPassNumber());
/*
r = new(CmpCommon::contextHeap()) HbaseScanRule
("Hbase Scan transformation rule",
new(CmpCommon::contextHeap()) Scan(REL_SCAN, CmpCommon::contextHeap()),
new(CmpCommon::contextHeap())
HbaseAccess(CmpCommon::contextHeap()));
set->insert(r);
set->enable(r->getNumber());
*/
//*****
//--MV
}
// -----------------------------------------------------------------------
// methods for transformation rules
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// methods for JoinCommutativityRule
// -----------------------------------------------------------------------
NABoolean JoinCommutativityRule::topMatch (RelExpr * expr,
Context * context)
{
// check if this rule has been disabled via RuleGuidanceCQD
// the CQD is COMP_INT_77 and it represents a bitmap
// below we check if the bit # 1 is ON
if(CURRSTMT_OPTDEFAULTS->isRuleDisabled(1))
return FALSE;
// if we want the specific join order given by the normalizer
// then lets not waste our time doing this
if (CURRSTMT_OPTDEFAULTS->joinOrderByUser())
return FALSE;
if (CURRSTMT_CQSWA && CURRSTMT_CQSWA->reArrangementSuccessful_)
return FALSE;
//++MV, do not apply to join on top of log insert before JoinToBushyTreeRule
if (expr->getInliningInfo().isDrivingMvLogInsert())
return FALSE;
//++MV, do not apply to this join
if (expr->getInliningInfo().isJoinOrderForcedByInlining())
return FALSE;
// if the rule doesn't match the general pattern or if this is a
// semi, antisemi or left join. quit
if (NOT Rule::topMatch(expr,context))
return FALSE;
// QSTUFF
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isStream());
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isEmbeddedUpdateOrDelete());
if (((Join *)expr)->child(0).getGroupAttr()->isStream()){
return FALSE;
}
if (((Join *)expr)->child(0).getGroupAttr()->isEmbeddedUpdateOrDelete()){
return FALSE;
}
// QSTUFF
// Semi Joins cannot be left-shifted
if (NOT ((Join *)expr)->isInnerNonSemiJoin())
return FALSE;
// The join commutativity rule will succeed, if both its
// children are nodes other than inner join nodes. Since the
// children to the join nodes in this rule are cut nodes, test
// the "numJoinedTables" attribute of the logical properties instead.
if (expr->getGroupAttr()->getNumJoinedTables() == 2)
return TRUE;
// If zigzag CQD is off and this is not a join resulted from the PTRule
// Then we do not apply the rule.
if (CURRSTMT_OPTDEFAULTS->isZigZagTreeConsidered() == DF_OFF &&
NOT ((Join *)expr)->isJoinFromPTRule())
return FALSE;
// If this join is from MJPrimeTableRule then we allow all its zigzag
// variations regardless of the values of the zigzag CQD. We however
// limit that to join subtrees of 16 child or less
if (((Join *)expr)->isJoinFromPTRule() &&
expr->getGroupAttr()->getNumJoinedTables() > 16)
return FALSE;
else if (CURRSTMT_OPTDEFAULTS->isZigZagTreeConsidered() == DF_SYSTEM)
{
//----------------ZZT control------------------//
CostScalar r0Max =
expr->child(0).getGroupAttr()->getResultMaxCardinalityForEmptyInput()
* expr->child(0).getGroupAttr()->getRecordLength();
CostScalar r1 =
expr->child(1).getGroupAttr()->getResultCardinalityForEmptyInput()
* expr->child(1).getGroupAttr()->getRecordLength();
if (r0Max >= r1)
return FALSE;
// If r1Max can fit in memory then in such case no big advantage of
// switching the children of the hash join
CostScalar r1Max =
expr->child(1).getGroupAttr()->getResultMaxCardinalityForEmptyInput()
* expr->child(1).getGroupAttr()->getRecordLength();
if( r1Max < CostScalar(1024* CURRSTMT_OPTDEFAULTS->getMemoryLimitPerCPU()))
return FALSE;
// we could go more aggressive and say we should have
// r0 < 1024* CURRSTMT_OPTDEFAULTS->getMemoryLimitPerCPU() < r1
// to allow zigzag. We may also want to pay attention
// to ordered hash join now (although zigzag was initially
// proposed for Hybrid only).
}
return TRUE;
}
RelExpr * JoinCommutativityRule::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &memory)
{
Join *result = (Join *) Rule::nextSubstitute(before,NULL,memory);
// don't try to apply the commutativity rule on the result, it would
// just give the original expression back
// NOTE: Guidance could be used to do this, but that would have the
// disadvantage that when a group gets optimized twice (e.g. for two
// different reqd. orders), the commutativity rule would be applied
// unnecessarily during the second optimization, because the guidance
// object was active only during the first optimization.
result->contextInsensRules() += getNumber();
// synthesize the estimated logical properties for the new node
result->flipChildren();
// If this yielded a bushy tree do not transform it anymore
// we only want to do zig-zag trees. We do not want to do
// TSJs on the any generated bushy tree either.
if (before->getGroupAttr()->getNumJoinedTables() != 2)
{
result->contextInsensRules() += GlobalRuleSet->transformationRules();
result->setTransformComplete();
// we're an optional zigzag join
result->setJoinForZigZag();
// Disable also the merge join rule if this is a join where the
// rows from one of the children will have at most one match
// in the other.
// Those types of merge joins are symmetric in the executor.
if (result->eitherChildHasUniqueMatch())
result->contextInsensRules() += MergeJoinRuleNumber;
}
// If the before join resulted from a application of MJPrimeTableRule, directly
// or indirectly, we set the result join to be also from MJPrimeTableRule.
if (((Join*)before)->isJoinFromPTRule())
result->setJoinFromPTRule();
result->setPotential(before->getPotential());
return result;
}
// -----------------------------------------------------------------------
// methods for JoinLeftShiftRule
// -----------------------------------------------------------------------
NABoolean JoinLeftShiftRule::topMatch (RelExpr * expr,
Context * context)
{
// check if this rule has been disabled via RuleGuidanceCQD
// the CQD is COMP_INT_77 and it represents a bitmap
// below we check if the bit # 2 is ON
if(CURRSTMT_OPTDEFAULTS->isRuleDisabled(2))
return FALSE;
// if we want the specific join order given by the normalizer
// then lets not waste our time doing this
if (CURRSTMT_OPTDEFAULTS->joinOrderByUser())
return FALSE;
// For now if MultiJoin rewrite take place, the left shift rule is disabled.
if (QueryAnalysis::Instance() && QueryAnalysis::Instance()->multiJoinsUsed())
return FALSE;
if (CURRSTMT_CQSWA && CURRSTMT_CQSWA->reArrangementSuccessful_)
return FALSE;
//++MV, do not apply to join on top of log insert before JoinToBushyTreeRule
if (expr->getInliningInfo().isDrivingMvLogInsert())
return FALSE;
if (NOT Rule::topMatch(expr,context))
return FALSE;
// QSTUFF
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isStream());
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isEmbeddedUpdateOrDelete());
// QSTUFF
// don't left shift anti-semi join.
if (((Join *)expr)->isAntiSemiJoin())
return FALSE;
// QSTUFF
// we don't allow generic update roots to be moved
if (((Join *)expr)->getGroupAttr()->isGenericUpdateRoot() ||
((Join *)expr)->child(0).getGroupAttr()->isGenericUpdateRoot())
return FALSE;
// QSTUFF
// When looking at the top node of a possible match for this rule,
// return FALSE, if the left input group doesn't contain a
// logical expression that is an inner join of at least 2 tables
return (expr->child(0).getGroupAttr()->getNumJoinedTables() >= 2);
}
RelExpr * JoinLeftShiftRule::nextSubstitute(RelExpr * before,
Context * context/*context*/,
RuleSubstituteMemory * & memory)
{
// Check to see whether any transformations should be applied on this
// join child.
if (((Join *)(before->child(0).getPtr()))->isTransformComplete()) return NULL;
// do the default pattern substitution logic
Join *result = (Join *)Rule::nextSubstitute(before,NULL,memory);
Join *joinChild = (Join *)result->child(0).getPtr();
// Move all selection predicates to the new top join
result->selectionPred() += joinChild->selectionPred();
joinChild->selectionPred().clear();
// Allocate a new Group Attributes for the child.
joinChild->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes);
// Compute the set of values that each child will potentially require
// for evaluating expressions. Also compute the set of values that
// the child is capable of producing as output.
// This call will NOT effect the characteristic input and output
// values of CutOps.
result->allocateAndPrimeGroupAttributes();
// If the right child does not have any predicates on its filter,
// eliminate the filter.
Filter * filterPtr = (Filter *)result->child(1).getPtr();
// For the case where joinChild is a semiJoin, leftJoin or an
// antiSemiJoin make sure that the joinPred does not reference
// anything from result->child(1)
// i.e. joinPred is covered by joinChild inputs and its childs outputs
if (NOT joinChild->getJoinPred().isEmpty())
{
ValueIdSet charInputs =
joinChild->getGroupAttr()->getCharacteristicInputs();
ValueIdSet child0Outputs =
joinChild->child(0)->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet availableExprs = charInputs;
availableExprs += child0Outputs;
availableExprs +=
joinChild->child(1)->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet joinPredicates = joinChild->getJoinPred();
ValueIdSet vegPredicates;
// Separate the vegPredicates from the rest
joinPredicates.lookForVEGPredicates(vegPredicates);
joinPredicates -= vegPredicates;
// make sure we can cover the join predicates
joinPredicates.removeCoveredExprs(availableExprs);
// The Veg predicates must be covered by both children.
// We know that joinChild->child(1) covers them, verify
// that the other child also covers them.
// We used to call removeCoveredExprs on the vegPredicates, just
// like we do above for the join predicates. But, the method
// removeCoveredExprs doesn't look for coverage of the vegPredicates,
// it looks for coverage of their underlying vegRefs. The coverage
// code for vegRefs returns TRUE if any component of the vegRef is
// is "covered", including a constant. We don't want to say a
// vegPredicate is "covered" if it references a constant, we only
// want to say it is "covered" if it references a characteristic
// output of child0. Otherwise, we can erroneously think that this
// left shift is OK in some cases and we can then get wrong answers.
// For example:
// SELECT t0.i1 from j3 t0, j1 t2 left join j3 t1 on t2.i1 = 1;
// If the initial join order is t0,t2,t1, we don't want to allow
// t1 to be left shifted with t2. But removeCoveredExpr would think
// that the vegPredicate "t2.i1 = 1" was "covered" by child0 (t0),
// because the constant "1" is always "covered". But this is wrong.
// It turns out that VEGPredicate::isCovered does exactly what needs
// to be done, so we now call it instead.
ValueIdSet dummyReferencedInputs,dummyCoveredExpr,dummyUncoveredExpr;
GroupAttributes emptyGA;
emptyGA.setCharacteristicOutputs(child0Outputs);
NABoolean vegPredsCovered =
vegPredicates.isCovered(charInputs,
emptyGA,
dummyReferencedInputs,
dummyCoveredExpr,
dummyUncoveredExpr);
if (NOT vegPredsCovered OR
NOT joinPredicates.isEmpty())
{
joinChild->deleteInstance();
result->deleteInstance();
filterPtr->deleteInstance();
return NULL;
}
}
// Push down as many full or partial expressions that can be
// computed by the children. Recompute the Group Attributes of
// each child that is not a CutOp.
result->pushdownCoveredExpr
(result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
if (filterPtr->selectionPred().isEmpty())
{
result->child(1) = filterPtr->child(0).getPtr();
filterPtr->deleteInstance(); // delete the Filter
filterPtr=NULL;
}
else
{
// Check to see if the filter node is adding any new characteristic
// inputs (i.e. outer references). If not, no true join predicates were
// pushed down. So, eliminate this filter.
if( (filterPtr->getGroupAttr()->getCharacteristicInputs() ==
filterPtr->child(0).getGroupAttr()->getCharacteristicInputs()) AND
(filterPtr->getGroupAttr()->getCharacteristicOutputs() ==
filterPtr->child(0).getGroupAttr()->getCharacteristicOutputs()) )
{
result->child(1) = filterPtr->child(0).getPtr();
filterPtr->deleteInstance();
filterPtr=NULL;
}
else
{
filterPtr->synthLogProp();
filterPtr->getGroupAttr()->addToAvailableBtreeIndexes(
filterPtr->child(0).getGroupAttr()->getAvailableBtreeIndexes());
}
}
// Call pushdownCovered expressions on the joinChild
// Since the children of the joinChild are Cut operators, nothing
// will be pushed to them but predicates will be removed from
// the joinChild if it is determined that they were given (givable)
// to the cut operators before.
joinChild->pushdownCoveredExpr
(joinChild->getGroupAttr()->getCharacteristicOutputs(),
joinChild->getGroupAttr()->getCharacteristicInputs(),
joinChild->selectionPred());
// synthesize the estimated logical properties for the new node
joinChild->synthLogProp();
// Call synthLogProp on the result also so that we can call
// findEquiJoinPredicates() with the new set of predicates
result->synthLogProp();
//----------------Heuristics Domain-------------------------------
//
// r3 / r3 /
// / /
// Join Join
// r1 / \ rB r2 / \ rA
// / \ / \
// Join ==> Join
// r0 / \ rA r0 / \ rB
// / \ / \
//
CostScalar r0 =
joinChild->child(0).getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar r1 =
before->child(0).getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar r2 =
joinChild->getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar rA =
result->child(1).getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar rB =
joinChild->child(1).getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar r3 =
before->getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar s0 = r0 * joinChild->child(0).getGroupAttr()->getRecordLength();
CostScalar s1 = r1 * before->child(0).getGroupAttr()->getRecordLength();
CostScalar s2 = r2 * joinChild->getGroupAttr()->getRecordLength();
CostScalar sA = rA * result->child(1).getGroupAttr()->getRecordLength();
CostScalar sB = rB * joinChild->child(1).getGroupAttr()->getRecordLength();
CostScalar s3 = r3 * before->getGroupAttr()->getRecordLength();
//-------------------TSJ control----------------------------------
// Nested join control as coded does not work. Causes bug #129
// (Genesis case #10-000222-6834). Commenting out. Code also
// makes no sense.
/*
if (CURRSTMT_OPTDEFAULTS->isNestedJoinControlEnabled())
{
ValueIdSet beforePred = before->selectionPred();
beforePred += ((Join*)before)->joinPred();
ValueIdSet beforeChildPred = (before->child(0).getPtr())->selectionPred();
beforeChildPred += ((Join*)(before->child(0).getPtr()))->joinPred();
ValueIdSet resultPred = result->selectionPred();
resultPred += result->joinPred();
ValueIdSet resultChildPred = joinChild->selectionPred();
resultChildPred += joinChild->joinPred();
if (beforePred == resultChildPred)
{
if(r0 >= r1)
{joinChild->doNotTransformToTSJ();}
else
{((Join*)before)->doNotTransformToTSJ();}
}
if (resultPred == beforeChildPred)
{
if(r0 <= r2)
{result->doNotTransformToTSJ();}
else
{((Join*)(before->child(0).getPtr()))->doNotTransformToTSJ();}
}
}
*/
//-------------------------------------------------------------
//------------- Minimum Flow Heuristic -------------
if (CURRSTMT_OPTDEFAULTS->optimizerHeuristic5())
{
{
// This part of the flow did not change.
CostScalar constFlow = s0 + s3 + sA + sB;
CostScalar fudgeFactor = 1.5;
if( (constFlow+s2) >= (fudgeFactor*(constFlow+s1)+1000) )
// maybe also add (AND r2 > r1 * fudg factor) cuz in some cases
// row count could be even more important than size e.g TSJ (#probes)
{
result->contextInsensRules() += GlobalRuleSet->implementationRules();
}
else if( (constFlow+s1) > (fudgeFactor*(constFlow+s2)+1000) )
{
((Join*)before)->contextInsensRules() += GlobalRuleSet->implementationRules();
}
}
}
//------------------------------------------------------------
//-------------------X product reduction----------------------
// allow cross product control if:
// 1) cross_product_control is active AND
// 2) multijoin has >1 base table
NABoolean allowCrossProductControl =
(CURRSTMT_OPTDEFAULTS->isCrossProductControlEnabled()) AND
(before->getGroupAttr()->getGroupAnalysis()->
getAllSubtreeTables().entries() > 1);
if (allowCrossProductControl)
{
if (NOT ((Join*)(before->child(0).getPtr()))->isCrossProduct()
AND joinChild->isCrossProduct()
AND NOT ((Join*)before)->isCrossProduct())
{
if (r2 > CostScalar(10) * r1)
{
result->contextInsensRules() += GlobalRuleSet->transformationRules();
result->contextInsensRules() += GlobalRuleSet->implementationRules();
joinChild->contextInsensRules() += GlobalRuleSet->transformationRules();
joinChild->contextInsensRules() += GlobalRuleSet->implementationRules();
}
}
if (result->isCrossProduct()
AND joinChild->isCrossProduct())
{
if (r2 >= r1)
{
result->contextInsensRules() += GlobalRuleSet->implementationRules();
}
}
}
//------------------------------------------------------------------
// don't try to apply the left shift rule on the result, it would
// just give the original expression back
result->contextInsensRules() += getNumber();
return (RelExpr *)result;
}
Guidance * JoinLeftShiftRule::guidanceForExploringChild(Guidance *,
Context *,
Lng32)
{
return new(CmpCommon::statementHeap())
OnceGuidance(JoinToTSJRuleNumber,
CmpCommon::statementHeap());
}
Guidance * JoinLeftShiftRule::guidanceForExploringSubstitute(Guidance *)
{
return new (CmpCommon::statementHeap())
OnceGuidance(getNumber(),CmpCommon::statementHeap());
}
Guidance * JoinLeftShiftRule::guidanceForOptimizingSubstitute(Guidance *,
Context *)
{
return new (CmpCommon::statementHeap())
OnceGuidance(getNumber(),CmpCommon::statementHeap());
}
// -----------------------------------------------------------------------
// methods for IndexJoinRule1
// -----------------------------------------------------------------------
NABoolean IndexJoinRule1::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr, context))
return FALSE;
Scan * s = (Scan *) expr;
// Disable the rule for sampleScan for now.
if (s->isSampleScan() == TRUE)
return FALSE;
// go through all indexes and find out in which way they could
// be used, if this is not already done
s->addIndexInfo();
// in pass 1, consider only one index join
return (s->getNumIndexJoins() == 0 AND
s->getIndexInfo().entries() > 0);
}
RelExpr * IndexJoinRule1::nextSubstitute(
RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
return nextSubstituteForPass(before,memory,
RuleSet::getFirstPassNumber());
}
RelExpr * IndexJoinRule1::nextSubstituteForPass(
RelExpr * before,
RuleSubstituteMemory * & memory,
Lng32 /*pass*/)
{
RelExpr *result;
if (memory == NULL)
{
// -----------------------------------------------------------------
// this is the first call, create info on all indexes and create
// index joins for pass 1
// -----------------------------------------------------------------
// allocate a new memory for multiple substitutes
memory = new (CmpCommon::statementHeap())
RuleSubstituteMemory(CmpCommon::statementHeap());
assert(before->getOperatorType() == REL_SCAN);
// cast the before expression to a scan
Scan * bef = (Scan *) before;
CollIndex numIndexJoins = bef->getPossibleIndexJoins().entries();
ValueIdSet selectionpreds;
if ((CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
(bef->selectionPred().entries()))
{
ValueIdList selectionPredList(bef->selectionPred());
ItemExpr *inputItemExprTree = selectionPredList.rebuildExprTree(ITM_AND,FALSE,FALSE);
ItemExpr * resultOld = revertBackToOldTree(CmpCommon::statementHeap(), inputItemExprTree);
resultOld->convertToValueIdSet(selectionpreds, NULL, ITM_AND);
doNotReplaceAnItemExpressionForLikePredicates(resultOld,selectionpreds,resultOld);
// ValueIdSet resultSet;
// revertBackToOldTreeUsingValueIdSet(bef->selectionPred(), resultSet);
// ItemExpr* resultOld = resultSet.rebuildExprTree(ITM_AND,FALSE,FALSE);
// selectionpreds += resultSet;
// doNotReplaceAnItemExpressionForLikePredicates(resultOld,selectionpreds,resultOld);
}
else
selectionpreds = bef->selectionPred();
for (CollIndex i = 0; i < numIndexJoins; i++)
{
// get to the recipe on how to build the index join
ScanIndexInfo *ixi = bef->getPossibleIndexJoins()[i];
// QSTUFF VV
// for streams we must check whether all predicates are covered by index
// and allcharacteristic outputs are produced by index. In case of an
// embedded update/delete we must use the characteristic outputs of the
// generic update root. All this ensures that updates to columns covered
// by predicates or seen by user cause stream to be rescheduled.
if (bef->getGroupAttr()->isStream())
{
if (ixi->indexPredicates_.contains(selectionpreds)){
ValueIdSet outputs =
(bef->getGroupAttr()->isEmbeddedUpdate() ?
bef->getGroupAttr()->getGenericUpdateRootOutputs() :
bef->getGroupAttr()->getCharacteristicOutputs());
// the output value check is only required for embedded
// updates since a delete always touches all indexes
if (ixi->outputsFromIndex_.contains(outputs) ||
(bef->getGroupAttr()->isEmbeddedDelete())) {
memory->insert(makeSubstituteFromIndexInfo(bef,ixi));
}
else {
*CmpCommon::diags() << DgSqlCode(4207)
<< DgTableName(ixi->usableIndexes_[0]->getIndexDesc()->
getNAFileSet()->getExtFileSetName());
}
}
else{
*CmpCommon::diags() << DgSqlCode(4208)
<< DgTableName(ixi->usableIndexes_[0]->getIndexDesc()->
getNAFileSet()->getExtFileSetName());
}
}
else
// QSTUFF
// insert the index join into the substitute memory
memory->insert(makeSubstituteFromIndexInfo(bef,ixi));
} // for each precomputed index join
} // memory == NULL
// ---------------------------------------------------------------------
// handle case of multiple substitutes
// ---------------------------------------------------------------------
if (memory != NULL)
{
result = memory->getNextSubstitute();
if (result == NULL)
{
// returned all the substitutes
// now delete the substitute memory, so we won't be called again
delete memory;
memory = NULL;
}
// return the next retrieved substitute
return result;
}
else
return NULL; // rule didn't fire
}
RelExpr * IndexJoinRule1::makeSubstituteFromIndexInfo(Scan *bef,
ScanIndexInfo *ixi)
{ // the substitute is a join between two scan nodes for the same table
Scan *leftScan = new (CmpCommon::statementHeap())
Scan(bef->getTableName(),bef->getTableDesc());
leftScan->setForceIndexInfo();
Scan *rightScan = new (CmpCommon::statementHeap())
Scan(bef->getTableName(),bef->getTableDesc());
rightScan->setForceIndexInfo();
rightScan->setSuppressHints();
// propagate SqlTableOpen information pointers
leftScan->setOptStoi(bef->getOptStoi());
rightScan->setOptStoi(bef->getOptStoi());
// hash and merge joins make not much sense for index joins, exclude them
Join *subs = new (CmpCommon::statementHeap())
Join(leftScan,rightScan,REL_TSJ);
subs->setGroupAttr(bef->getGroupAttr());
// Mark it as an indexJoin
subs->setIsIndexJoin();
// propagate access options
leftScan->accessOptions() = bef->accessOptions();
rightScan->accessOptions() = bef->accessOptions();
// the char. outputs of the left and right scan are already
// precomputed
leftScan->setPotentialOutputValues(ixi->outputsFromIndex_);
rightScan->setPotentialOutputValues(ixi->outputsFromRightScan_);
// the index predicates go to the left scan
leftScan->selectionPred() += ixi->indexPredicates_;
leftScan->setComputedPredicates(bef->getComputedPredicates());
ValueIdSet selectionpreds;
if ((CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
(bef->selectionPred().entries()))
{
ValueIdList selectionPredList(bef->selectionPred());
ItemExpr *inputItemExprTree = selectionPredList.rebuildExprTree(ITM_AND,FALSE,FALSE);
ItemExpr * resultOld = revertBackToOldTree(CmpCommon::statementHeap(), inputItemExprTree);
resultOld->convertToValueIdSet(selectionpreds, NULL, ITM_AND);
doNotReplaceAnItemExpressionForLikePredicates(resultOld,selectionpreds,resultOld);
// ValueIdSet resultSet;
// revertBackToOldTreeUsingValueIdSet(bef->selectionPred(), resultSet);
// ItemExpr* resultOld = resultSet.rebuildExprTree(ITM_AND,FALSE,FALSE);
// selectionpreds += resultSet;
// doNotReplaceAnItemExpressionForLikePredicates(resultOld,selectionpreds,resultOld);
}
if(CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON &&
(bef->selectionPred().entries()))
{
rightScan->selectionPred() += selectionpreds;
}
else
// all other predicates go to the right child
rightScan->selectionPred() += bef->selectionPred();
rightScan->selectionPred() += ixi->joinPredicates_;
// ---------------------------------------------------------------------
// Bugfix: soln # 10-020930-2072
// The selPred of the leftScan should be subtracted form the rightScan
// selPred only if there is not uncovered part in the former.
// ---------------------------------------------------------------------
GroupAttributes alwaysCoveredGA;
ValueIdSet dummyReferencedInputs, alwaysCovered,anyUnCoveredExpr;
// For coverage check for the characteristics inputs, as well as the
// index columns
ValueIdSet availableColumns;
leftScan->getPotentialOutputValues(availableColumns);
availableColumns += ixi->indexColumns_;
availableColumns += bef->getGroupAttr()->getCharacteristicInputs();
(leftScan->selectionPred()).isCovered(
availableColumns,
alwaysCoveredGA,
dummyReferencedInputs,
alwaysCovered,
anyUnCoveredExpr);
if(anyUnCoveredExpr==NULL)
rightScan->selectionPred() -= leftScan->selectionPred();
// ******************************************************************
// 10-040303-3776: if a predicate factor is covered by the left scan
// (index scan), no need to re-calculate it on the right child
// ******************************************************************
rightScan->selectionPred() -= alwaysCovered;
// a copy of the join predicates is left on the TSJ, or else
// the join predicates are executed on the right child alone
subs->selectionPred() = ixi->joinPredicates_;
// we also know which indexes the left scan should consider,
// and we know that all of them are indexOnly for that scan
leftScan->setIndexOnlyScans(ixi->usableIndexes_);
// Only consider the primary access path for the right table!
// We used to consider any index which can supply the remaining
// values we need, but then the index join will most likely have to
// scan the entire right child for every probe, since the alternate
// index's key is not the primary key columns, and the join predicates
// will be on the primary key columns.
// setIndexOnlyScans demands a set of index descriptors, even if we
// have only one!
SET(IndexProperty *) primaryIndex(CmpCommon::statementHeap());
IndexProperty * ixProp = new(CmpCommon::statementHeap()) IndexProperty(
(IndexDesc *)bef->getTableDesc()->getClusteringIndex());
primaryIndex.insert(ixProp);
rightScan->setIndexOnlyScans(primaryIndex);
// set the number of index joins already done for the right scan
rightScan->setNumIndexJoins(bef->getNumIndexJoins() + 1);
// now do the standard thing one should do with a substitute
subs->allocateAndPrimeGroupAttributes();
leftScan->getGroupAttr()->addCharacteristicInputs(ixi->inputsToIndex_);
subs->pushdownCoveredExpr
(subs->getGroupAttr()->getCharacteristicOutputs(),
subs->getGroupAttr()->getCharacteristicInputs(),
subs->selectionPred());
// In pushdownCoveredExpr, while computing characteristics inputs
// and outputs of the children of Join, we minimize the set of
// coverSubExpr such that we do not produce a value and an expression
// that depends on a value.
// E.g. a,b,a+b ==> a,b
// a,a + 1 ==> a
// Normally this does not causes any problem. But in case of index
// joins, if an expression is on a clustering key, then both the
// clustering key and the expression form the characteristics output
// of the group (see Scan::addIndexinfo for adding clustering key)
// Now, while minimizing the characteristics outputs, the expression
// is removed from the outputs of the left child. The right child also does
// not produce it, because it thinks that since it is covered by the left child
// it should have been taken care of there. Nested join cannot produce anything
// by itself. Hence we are left in a situation where no expression is producing
// the required outputs. So the only resort that we have is to force the
// right child to produce all those characteristic outputs
// that an index join should produce, but are not being produced by the left
// child.If it is not added back, it will cause an assertion in the
// Generator. Sol: 10-040227-3621
// all these outputs are needed by the index join
ValueIdSet reqdOutputsForParent = subs->getGroupAttr()->getCharacteristicOutputs();
reqdOutputsForParent.subtractSet(leftScan->getGroupAttr()->getCharacteristicOutputs());
rightScan->getGroupAttr()->addCharacteristicOutputs(reqdOutputsForParent);
// QSTUFF
// push stream and delete logical property to left child
// never push anything to right child. Both scans inhert the old
// access options which have the UpdateOrDelete flag set to
// cause exclusive locks to be acquired.
leftScan->getGroupAttr()->setStream(
bef->getGroupAttr()->isStream());
leftScan->getGroupAttr()->setSkipInitialScan(
bef->getGroupAttr()->isSkipInitialScan());
leftScan->getGroupAttr()->setEmbeddedIUD(
bef->getGroupAttr()->getEmbeddedIUD());
// synthesize logical properties for the new leaf nodes
subs->setCursorUpdate(TRUE);
// QSTUFF
leftScan->synthLogProp();
rightScan->synthLogProp();
// we don't call synthLogProp() for the substitute, since its
// logical properties are already stored in the group attributes,
// but there are join data members that need to be set. This is
// done in the synthConstraints() method, which returns early
// if the logical properties are already set.
subs->synthConstraints(NULL);
return subs;
}
// -----------------------------------------------------------------------
// methods for IndexJoinRule2
// -----------------------------------------------------------------------
NABoolean IndexJoinRule2::topMatch (RelExpr * expr,
Context * context)
{
// for now, return FALSE for this rule (failures in TEST005)
return FALSE;
if (NOT Rule::topMatch(expr, context))
return FALSE;
Scan * s = (Scan *) expr;
// Disable the rule for sampleScan for now.
if (s->isSampleScan() == TRUE)
return FALSE;
// go through all indexes and find out in which way they could
// be used, if this is not already done
s->addIndexInfo();
// in pass 2, consider up to MAX_NUM_INDEX_JOINS index joins
return (s->getNumIndexJoins() < Scan::MAX_NUM_INDEX_JOINS AND
s->getIndexInfo().entries() > 0);
}
RelExpr * IndexJoinRule2::nextSubstitute(
RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
return nextSubstituteForPass(before,memory,
RuleSet::getSecondPassNumber());
}
// -----------------------------------------------------------------------
// Methods for OrOptimizationRule
// -----------------------------------------------------------------------
NABoolean OrOptimizationRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr,context))
return FALSE;
Scan *s = (Scan *) expr;
const ValueIdSet preds = expr->getSelectionPredicates();
// apply this rule only if there is an OR on the top of the
// predicate tree, this must mean that we have a single entry
if (preds.entries() != 1 OR
NOT CURRSTMT_OPTDEFAULTS->isOrOptimizationEnabled())
return FALSE;
ValueId vid;
preds.getFirst(vid);
// the one predicate we found must be an OR
if (vid.getItemExpr()->getOperatorType() != ITM_OR)
return FALSE;
// don't apply to embedded update/delete operators
if (expr->getGroupAttr()->isEmbeddedUpdateOrDelete())
return FALSE;
// go through all indexes and find out in which way they could
// be used, if this is not already done (needed for next step)
s->addIndexInfo();
// don't apply rule if there isn't at least one alternate index
// (we want to make a union of at least two different indexes)
if ((s->getIndexOnlyIndexes().entries() +
s->getPossibleIndexJoins().entries()) < 2)
return FALSE;
return TRUE;
}
// helper struct for OrOptimizationRule::nextSubstitute()
// (should really be local to that method, but some C++ compilers
// don't like local structs)
struct IndexToDisjuncts
{
friend class OrOptimizationRule;
ValueIdSet disjuncts_; // disjuncts associated with this object
CostScalar maxPoints_; // highest penalty points scored by a disjunct
private:
// private constructor, object should be created by a friend only
IndexToDisjuncts() : maxPoints_(0.0)
{}
};
//10-050310-5477:
//Helper Function for OR-Optimization.
NABoolean doesValueIdEvaluateToFalse( ValueId predId )
{
OperatorTypeEnum OpType = predId.getItemExpr()->getOperatorType();
if( OpType == ITM_RETURN_FALSE )
{
return TRUE;
}
else if( OpType == ITM_CONSTANT )
{
NABoolean negate;
ConstValue *cv = predId.getItemExpr()->castToConstValue(negate);
if( cv && cv->getType()->getTypeQualifier() == NA_BOOLEAN_TYPE )
{
Int32 val = *((Int32*)cv->getConstValue());
if( val == 0 )
{
return TRUE;
}
}
}
return FALSE;
}
RelExpr * OrOptimizationRule::nextSubstitute(
RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & /*memory*/)
{
Scan *s = (Scan *) before;
MapValueIds *result = NULL;
ValueId vid;
ValueIdSet disjuncts;
ValueIdSet disjunctsProcessedSoFar;
ValueIdSet tableColumns = s->getTableDesc()->getColumnList();
const ValueIdList &tableColumnList = s->getTableDesc()->getColumnList();
const ValueIdList &tableColumnVEGList =
s->getTableDesc()->getColumnVEGList();
CollIndex numCols = tableColumns.entries();
ValueIdList charOutputList;
ValueIdList coPartialResult;
RelExpr *partialResult = NULL;
// a sparse array that can be used to look up which index we have selected
// for predicates on a particular column (identified by column number)
ARRAY(CollIndex) indexInfoByColNum(CmpCommon::statementHeap());
// a sparse array that stores the associated disjuncts for each index,
// arranged by index number in the scan node
ARRAY(IndexToDisjuncts *) disjunctsByIndex(CmpCommon::statementHeap());
CollIndex numIndexDescs = s->numUsableIndexes();
IndexToDisjuncts *idinfo = NULL;
charOutputList.insertSet(s->getGroupAttr()->getCharacteristicOutputs());
tableColumns.insertList(tableColumnVEGList);
// ---------------------------------------------------------------------
// Part 1: Determine which indexes to use and how many UNION nodes to
// make
//
// Split the predicate into disjuncts (components of an OR-backbone).
//
// For each disjunct, try to find a column of the table where that
// disjunct could be used as a key predicate. Do this with a simple
// algorithm that finds only obvious cases of key predicates such as
// <col> = expr. Note that this may miss some other possible key
// predicates as well as select some predicates that aren't really
// key predicates. This is not great but acceptable, since we are
// dealing with a conditional transformation in the optimizer. Give
// up if any disjunct does not satisfy this condition, since it
// would cause a full table scan, defeating the purpose of
// OR-optimization.
//
// Now "go shopping for an index" for each unique column obtained in
// step 3. Walk through the list of index-only indexes and index
// join indexes. If the index contains our column, compute the
// approximate number of probes needed.
//
// a) To calculate the number of probes, start with the UEC count of
// all columns that precede our column number in the index. Use 1 if
// the column is the leading column in the index. This approximates
// the number of MDAM probes.
//
// b) To this, add the number of rows returned by the scan node if
// this is an index join. This is a very crude approximation of the
// number of probes in the index join. Do not add any probes if the
// index alone covers all the characteristic outputs of the
// group. In this case we heuristically assume that local predicates
// don't require any base table columns, either. Note that we do the
// index selection only once per column and not once per disjunct,
// this is why we want to avoid looking at the predicate here.
//
// c) If an index has already been selected by a previous disjunct,
// count only the additional probes incurred by this disjunct, if
// any.
//
// d) Select the index with the lowest number of probes (using a
// simple tie breaker if necessary).
//
//
// get all disjunctive parts of the OR predicate (note that topMatch()
// made sure there is only one entry in the selection predicate ValueIdSet)
// ---------------------------------------------------------------------
ValueIdSet selPreds = s->getSelectionPredicates();
if ((CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON ) &&
(selPreds.entries()))
{
ItemExpr * inputItemExprTree = selPreds.rebuildExprTree(ITM_AND,FALSE,FALSE);
ItemExpr * resultOld = revertBackToOldTree(CmpCommon::statementHeap(), inputItemExprTree);
ValueIdSet convpredicates;
resultOld->convertToValueIdSet(convpredicates, NULL, ITM_AND);
doNotReplaceAnItemExpressionForLikePredicates(resultOld,convpredicates,resultOld);
convpredicates.getFirst(vid);
// ValueIdSet resultSet;
// revertBackToOldTreeUsingValueIdSet(selPreds, resultSet);
// ItemExpr * resultOld = resultSet.rebuildExprTree(ITM_AND,FALSE,FALSE);
// doNotReplaceAnItemExpressionForLikePredicates(resultOld,resultSet,resultOld);
// resultSet.getFirst(vid);
}
else
{
s->getSelectionPredicates().getFirst(vid);
}
vid.getItemExpr()->convertToValueIdSet(disjuncts, NULL, ITM_OR, FALSE);
// ---------------------------------------------------------------------
// Go through the disjuncts, find a column for which they can form
// a key predicate (give up if none found), and find an index to use.
// As mentioned above, this code recognizes only simple key predicates.
// ---------------------------------------------------------------------
// 10-050310-5477: If we have disjuncts of the form 1=2 or i = 30 or j = 40
// or k = 50; We used to ignore the valid disjuncts due to the idempotent
// condition 1=2. So the code change will scan all the disjuncts.
// ---------------------------------------------------------------------
ValueIdSet disjunctsEvaluatingToFalse;
for (ValueId d=disjuncts.init(); disjuncts.next(d); disjuncts.advance(d))
{
ItemExpr *ie = d.getItemExpr();
ItemExpr *col = NULL;
switch (ie->getOperatorType())
{
case ITM_VEG_PREDICATE:
{
// we should not really see VEGPredicates in a simple OR, but
// who knows, the monkey on the keyboard will generate this...
VEGPredicate *v = (VEGPredicate *) ie;
VEGReference *r = v->getVEG()->getVEGReference();
if (tableColumns.contains(r->getValueId()))
col = r;
}
break;
case ITM_EQUAL:
case ITM_LESS:
case ITM_LESS_EQ:
case ITM_GREATER:
case ITM_GREATER_EQ:
{
BiRelat *br = (BiRelat *) ie;
ItemExpr *leftOp = br->child(0);
ItemExpr *rightOp = br->child(1);
if (tableColumns.contains(leftOp->getValueId()))
col = leftOp;
else if (tableColumns.contains(rightOp->getValueId()))
col = rightOp;
else
{
//10-050310-5477: We have a case where we have
// both the LHS and RHS are not columns. We can run
// into this case if we have a FALSE or TRUE or ? = ?
// as one of the disjunct.
// 1] For TRUE and ?=? we have do a full table scan anyways,
// so break and Return NULL as the nextSubstitute
// 2] For the FALSE case we need to continue and with the
// other disjuncts. As we may still use OR-optimization.
// But mark this valueID we will remove it from our set
// of disjucts we are processing.
// If we are here we have disjunct of the form ? = ?
// No columns and falls into case 1]
return NULL;
}
}
break;
case ITM_CONSTANT:
// We have systemliteral FALSE or TRUE. case 2]
// If a predicate contains 1=2, it is constant folded and the entire
// OR predicate is TRUE. We never hit this rule;
// If a predicate contains 1=1, it is constant folded and removed
// from the OR predicate.
//
if( doesValueIdEvaluateToFalse(d) )
{
disjunctsEvaluatingToFalse += d;
}
else
{
return NULL;
}
break;
default:
// leave col set to NULL
// Return from here to maintain semantics.
return NULL;
}
// 10-050310-5477
// if we can't associate the disjunct with any column then there is
// no point in doing OR-optimization, because we would have to do a
// full table scan for this particular disjunct anyway. But we need
// to go throught all the disjuncts.
if(NOT col)
continue;
DCMPASSERT(col);
// Calculate the column number that this particular disjunct is
// using in a comparison.
CollIndex colNum = numCols; // initialize with invalid number
ValueId colValId = col->getValueId();
// calculate the column number
for (CollIndex c=0; c < numCols; c++)
if (colValId == tableColumnList[c] OR
colValId == tableColumnVEGList[c])
{
colNum = c;
break;
}
DCMPASSERT(colNum < numCols);
// -----------------------------------------------------------------
// Don't compute the index to use more than once for a given column
// number, since this algorithm is independent of the predicate
// (at least right now)
// -----------------------------------------------------------------
CostScalar bestIxPoints = 0.0;
CostScalar ixPoints;
if (indexInfoByColNum.used(colNum))
{
// re-use the column to index mapping established earlier
idinfo = disjunctsByIndex[indexInfoByColNum[colNum]];
}
else
{
// -------------------------------------------------------------
// Now "go shopping" for indexes. Walk through all indexes
// (both index-only and index joins) and check whether the
// index contains column <colNum>. If it does, compute a
// measure on how good the index is and pick the index that
// is best according to the measure. Keep an array that
// associates a set of disjuncts, the worst measure of these
// disjuncts, and a set of alternative indexes and
// index-joins with each index.
// -------------------------------------------------------------
CollIndex bestIxNum = NULL_COLL_INDEX;
IndexDesc *ixDesc;
Int32 colNumInIndex;
CollIndex ixNum = 0; // artificial numbering scheme
CollIndex numIndexDescs = s->numUsableIndexes();
IndexProperty **indexOnlyInfo = NULL;
ScanIndexInfo **indexJoinInfo = NULL;
// walk over the indexes for this scan node
for (ixNum = 0; ixNum < numIndexDescs; ixNum++)
{
ixDesc = s->getUsableIndex(ixNum, indexOnlyInfo);
// does the index contain column <colNum>?
colNumInIndex = -1;
for (CollIndex ixcolnum=0;
ixcolnum < ixDesc->getIndexKey().entries();
ixcolnum++)
{
IndexColumn *ic = (IndexColumn *)
ixDesc->getIndexKey()[ixcolnum].getItemExpr();
BaseColumn *bc =
(BaseColumn *) ic->getDefinition().getItemExpr();
DCMPASSERT(bc->getOperatorType() == ITM_BASECOLUMN);
if (colNum == (CollIndex) bc->getColNumber())
{
colNumInIndex = ixcolnum;
break;
}
}
// can this disjunct (probably) be used as a key predicate for
// this index?
if (colNumInIndex >= 0)
{
ixPoints = rateIndexForColumn(colNumInIndex,
s,
ixDesc,
(indexOnlyInfo != NULL));
// subtract shared penalty points with other disjuncts
// (see comments above why this is done)
if (disjunctsByIndex.used(ixNum))
{
CostScalar prevPoints =
disjunctsByIndex[ixNum]->maxPoints_ ;
if (prevPoints < ixPoints)
ixPoints -= prevPoints;
else
ixPoints = 0.0;
}
// compare to best index so far, if any
if (bestIxNum == NULL_COLL_INDEX OR ixPoints < bestIxPoints)
{
bestIxPoints = ixPoints;
bestIxNum = ixNum;
}
}
} // end of loop that walks through index descs
// Did we decide on an index to use for this disjunct? Give
// up if we didn't (e. g. there was no index on the column
// in the predicate)
if (bestIxNum == NULL_COLL_INDEX)
return NULL;
// remember the best index for column # <colNum> so we don't
// have to go through this loop again for the same column
indexInfoByColNum.insertAt(colNum, bestIxNum);
// record information about the association between this index and
// the disjunct in the array <disjunctsByIndex>
if (NOT disjunctsByIndex.used(bestIxNum))
{
disjunctsByIndex.insertAt(
bestIxNum,
new (CmpCommon::statementHeap()) IndexToDisjuncts);
idinfo = disjunctsByIndex[bestIxNum];
}
else
idinfo = disjunctsByIndex[bestIxNum];
} // did not compute score for this column number yet
// at this point we have calculated the pointer idinfo that
// points to the best index (heuristically chosen) for this
// disjunct
idinfo->disjuncts_ += d;
if (idinfo->maxPoints_ < bestIxPoints)
idinfo->maxPoints_ = bestIxPoints;
} // end of for loop over disjuncts
// 10-050310-5477
// if we can't associate the disjunct with any column then there is
// no point in doing OR-optimization, because we would have to do a
// full table scan for this particular disjunct anyway. Ideally here
// we have not disjuncts with column names so the expression should
// be folded to the maximum possible extent.
if(disjunctsEvaluatingToFalse == disjuncts)
return NULL;
// 10-050310-5477
// Remove all those idempotent disjuncts.
if(disjunctsEvaluatingToFalse.entries())
disjuncts -= disjunctsEvaluatingToFalse;
// 10-050310-5477
// if we can't associate the disjunct with any column then there is
// no point in doing OR-optimization, because we would have to do a
// full table scan for this particular disjunct anyway. Ideally here
// we have not disjuncts with column names so the expression should
// be folded to the maximum possible extent.
if(disjunctsEvaluatingToFalse == disjuncts)
return NULL;
// 10-050310-5477
// Remove all those idempotent disjuncts.
if(disjunctsEvaluatingToFalse.entries())
disjuncts -= disjunctsEvaluatingToFalse;
// if we haven't found more than one usable index then all this work
// was for nothing, since we can't produce two different scan nodes
if (disjunctsByIndex.entries() < 2)
return NULL;
// ---------------------------------------------------------------------
// Part 2: Now create the actual substitute
// ---------------------------------------------------------------------
// now walk over the stored indexes and disjuncts and create a new scan
// node for each entry in the disjunctsByIndex array
for (CollIndex currIndexNum=0; currIndexNum < numIndexDescs; currIndexNum++)
{
if (disjunctsByIndex.used(currIndexNum))
{
IndexToDisjuncts &idinfo = *(disjunctsByIndex[currIndexNum]);
// create a new scan node or a scan+union node
partialResult = makeSubstituteScan(
s,
idinfo.disjuncts_,
partialResult,
disjunctsProcessedSoFar,
charOutputList,
coPartialResult);
// remember the disjuncts used so far, to add their negation
// to future scan nodes
disjunctsProcessedSoFar += idinfo.disjuncts_;
} // entry of disjunctsByIndex is used
} // end of walk over disjunctsByIndex array
DCMPASSERT(disjunctsProcessedSoFar == disjuncts);
// ---------------------------------------------------------------------
// Create a MapValueIds node that maps the ValueIdUnion nodes to the
// original characteristics outputs, this is the result node of this rule.
// ---------------------------------------------------------------------
result = new (CmpCommon::statementHeap())
MapValueIds(partialResult, ValueIdMap(charOutputList, coPartialResult));
result->setGroupAttr(s->getGroupAttr());
// To be able to replace VEGies later for the upper values, remember
// that this map value ids really represents a scan node and that we should
// choose one of the columns of the scan node from any VEGies that need
// to be resolved here and have such a column as a VEG member.
result->addValuesForVEGRewrite(tableColumnList);
// now do the standard thing one should do with a substitute
result->allocateAndPrimeGroupAttributes();
result->pushdownCoveredExpr
(result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
// synthesize logical properties for the new nodes below the result
result->child(0)->synthLogProp();
// We don't want the union nodes to have a "NumBaseTables" attribute
// that is greater than one, because the group of the result will
// usually have its NumBaseTables attribute set to 1. Go and adjust
// this property in the newly generated union nodes.
Union *u = (Union *) result->child(0).getPtr();
while (u->getOperatorType() == REL_UNION)
{
u->getGroupAttr()->resetNumBaseTables(1);
u = (Union *) u->child(0).getPtr();
}
return result;
}
CostScalar OrOptimizationRule::rateIndexForColumn(
Int32 colNumInIndex,
Scan *s,
IndexDesc *ixDesc,
NABoolean indexOnly)
{
// yes, try to estimate how useful the index would be:
// - Add one penalty point for each "MDAM skip" we would
// do, approximated by the combined UEC of the columns
// in the index before <colNumInIndex>.
// - Add one penalty point for each probe we would do into
// the inner table of an index join, approximate this by
// the estimated rowcount (ignoring the fact that this
// disjunct probably selects fewer rows)
// - don't count those penalty points that have already
// been charged to a previous disjunct, since the current
// and previous disjunct will share a common scan node
CostScalar result = 1.0;
// collect penalty points for non-leading index column
if (colNumInIndex > 0)
{
// $$$$ replace with real UEC later
result += colNumInIndex;
}
// collect penalty points for index joins
if (NOT indexOnly)
{
// Add a penalty for doing the index join, BUT do this
// only if the newly created scan node for this
// disjunct really needs an index join. It could be
// that the base table was needed only to evaluate
// predicates. The logic below tries to recognize this
// case. We make the optimistic assumption that our
// local predicate does NOT require values from the
// base table (this is because we don't want to go
// through this calculation for each disjunct, we just
// want to do it once per column number). Note that
// if this is the wrong decision we still pick an
// index that could be used, it is just more expensive
// than it would otherwise be.
GroupAttributes indexOnlyGA;
ValueIdSet dummyReferencedInputs;
ValueIdSet dummyCoveredSubExpr;
ValueIdSet dummyUnCoveredExpr;
indexOnlyGA.addCharacteristicInputs(
s->getGroupAttr()->getCharacteristicInputs());
indexOnlyGA.addCharacteristicOutputs(
ixDesc->getIndexColumns());
if (s->getGroupAttr()->
getCharacteristicOutputs().isCovered(
s->getGroupAttr()->getCharacteristicInputs(),
indexOnlyGA,
dummyReferencedInputs,
dummyCoveredSubExpr,
dummyUnCoveredExpr))
{
// hope that we won't have to do an index join;
// add a half penalty point for the risk that we
// may be wrong, this makes sure that an otherwise
// equal index-only index will win
result += 0.5;
}
else
{
// expect to do an index join, assume that we have
// to probe into base table once for every row
// that all of the predicates produce
result += s->getGroupAttr()->getResultCardinalityForEmptyInput();
}
}
return result;
}
RelExpr * OrOptimizationRule::makeSubstituteScan(
Scan *s,
const ValueIdSet &disjuncts,
RelExpr *partialResult,
const ValueIdSet disjunctsProcessedSoFar,
const ValueIdList &origCharOutputList,
ValueIdList &resultCharOutputs)
{
RelExpr *result = NULL;
BindWA bindWA(ActiveSchemaDB(), CmpCommon::context());
NormWA normWA(CmpCommon::context());
CorrName cn(s->getTableName().getQualifiedNameObj(),
CmpCommon::statementHeap());
// -------------------------------------------------------------
// Make a new scan node
// -------------------------------------------------------------
Scan *newScan = new (CmpCommon::statementHeap()) Scan(
cn,
bindWA.createTableDesc(s->getTableDesc()->getNATable(),
cn,
FALSE));
newScan->setOptStoi(s->getOptStoi());
newScan->accessOptions() = s->accessOptions();
// QSTUFF
// propagate stream properties to new scan node
newScan->getGroupAttr()->setStream(s->getGroupAttr()->isStream());
newScan->getGroupAttr()->setSkipInitialScan(
s->getGroupAttr()->isSkipInitialScan());
// QSTUFF
// don't apply the OR-optimization rule on the results again, it
// shouldn't find any good conditions for further OR-optimization
newScan->contextInsensRules() += getNumber();
// Go through normalization of the new scan node, VEGies will be
// created for each column during this step. Unlike for joins,
// creating new UNION nodes requires new VEG regions: The
// values in the different unions aren't the same and shouldn't
// be members of a VEG.
ExprGroupId dummyPtr = newScan;
normWA.allocateAndSetVEGRegion(IMPORT_ONLY,newScan,0);
newScan->transformNode(normWA, dummyPtr);
newScan->rewriteNode(normWA);
newScan->normalizeNode(normWA);
const ValueIdList &vegCols =
s->getTableDesc()->getColumnVEGList();
const ValueIdList &newVegCols =
newScan->getTableDesc()->getColumnVEGList();
ValueIdMap newUnionMap(s->getTableDesc()->getColumnList(),
newScan->getTableDesc()->getColumnList());
for (CollIndex v=0; v < vegCols.entries(); v++)
newUnionMap.addMapEntry(vegCols[v],newVegCols[v]);
// assign the disjuncts to the new scan nodes
ValueIdSet vs;
ItemExpr *newDisjuncts = NULL;
// take the disjunct(s) destined for the new scan node and
// rewrite them in terms of the value ids of the new scan
// node, then add them to the new scan's selection
// predicates
newUnionMap.rewriteValueIdSetDown(disjuncts, vs);
newDisjuncts = vs.rebuildExprTree(ITM_OR);
newDisjuncts->synthTypeAndValueId();
newScan->selectionPred() += newDisjuncts->getValueId();
// compute the characteristic outputs of the new scan node,
// otherwise the standard method of calling
// allocateAndPrimeGroupAttributes and pushdownCoveredExpr
// below will use too many outputs
vs.clear();
newUnionMap.rewriteValueIdSetDown(
s->getGroupAttr()->getCharacteristicOutputs(), vs);
newScan->setPotentialOutputValues(vs);
if (partialResult)
{
// We'll now take the disjuncts that were processed so
// far and AND their negation to the selection predicate
// of the new scan node. Without this, we would return
// those rows twice that satisfy both the previous and
// the current disjuncts. Note that "negation" in this
// case means "NOT ((<pred>) IS TRUE)", we only want to
// exclude those rows where the left part of the OR
// evaluated to TRUE.
ItemExpr * negatedExprForNewScan;
Union * unionNode;
vs.clear();
newUnionMap.rewriteValueIdSetDown(disjunctsProcessedSoFar, vs);
negatedExprForNewScan = vs.rebuildExprTree(ITM_OR);
negatedExprForNewScan =
new (CmpCommon::statementHeap()) UnLogic(
ITM_NOT,
new (CmpCommon::statementHeap()) UnLogic(
ITM_IS_TRUE,
negatedExprForNewScan));
negatedExprForNewScan->synthTypeAndValueId();
newScan->selectionPred() += negatedExprForNewScan->getValueId();
// ---------------------------------------------------------
// create the UNION node that connects the two new scans
// ---------------------------------------------------------
result =
unionNode =
new(CmpCommon::statementHeap()) Union(partialResult, newScan);
ValueIdList newPartialResult;
// from Union::bindNode(): Make the map of value ids for
// the union
for (CollIndex i = 0; i < origCharOutputList.entries(); i++)
{
ValueId newValId;
// translate the value id of the characteristic
// outputs into the equivalent value id of the
// children
newUnionMap.rewriteValueIdDown(origCharOutputList[i],newValId);
ValueIdUnion *vidUnion = new (CmpCommon::statementHeap())
ValueIdUnion(resultCharOutputs[i],
newValId,
NULL_VALUE_ID,
unionNode->getUnionFlags());
vidUnion->synthTypeAndValueId();
unionNode->addValueIdUnion(vidUnion->getValueId(),
CmpCommon::statementHeap());
newPartialResult.insert(vidUnion->getValueId());
}
// store the value ids of the newly created partial result
resultCharOutputs = newPartialResult;
}
else
{
// pass the newly created scan node and the union map
// (via prevUnionMap) on to the next iteration
result = newScan;
newUnionMap.rewriteValueIdListDown(origCharOutputList,
resultCharOutputs);
}
return result;
}
// -----------------------------------------------------------------------
// methods for class RoutineJoinToTSJRule
// -----------------------------------------------------------------------
RoutineJoinToTSJRule::~RoutineJoinToTSJRule() {}
NABoolean RoutineJoinToTSJRule::topMatch(RelExpr *relExpr,
Context *context)
{
// Fasttrack out of here if this is something other than a RoutineJoin.
if (relExpr->getOperatorType() != REL_ROUTINE_JOIN)
return FALSE;
if (NOT Rule::topMatch(relExpr, context))
return FALSE;
Join *joinExpr = (Join * )relExpr;
// nested join is not good for filtering joins in the star join schema
// This rule is for when we merge with JoinToTSJ rule
if ((CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF &&
joinExpr->getSource() == Join::STAR_FILTER_JOIN) &&
! joinExpr->isRoutineJoin())
return FALSE;
return TRUE;
} // RoutineJoinToTSJRule::topMatch
RelExpr *RoutineJoinToTSJRule::nextSubstitute(
RelExpr *before,
Context *context,
RuleSubstituteMemory * & memory)
{
Join *bef = (Join * )before;
CMPASSERT(bef->isRoutineJoin());
// Want to make sure we don't have a join predicate..
CMPASSERT(bef->joinPred().isEmpty());
// Build the result tree,
// Make sure we call the copyTopNode for the Join.
Join *result = (Join *)Rule::nextSubstitute(before,NULL,memory);
result->setOperatorType(REL_TSJ);
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
// transfer all the join fields to the new TSJ
(void) bef->Join::copyTopNode(result,CmpCommon::statementHeap());
// Recompute the input values that each child requires as well as
// the output values that each child is capable of producing
result->allocateAndPrimeGroupAttributes();
ValueIdSet availableOutputsFromLeftChild =
before->child(0).getGroupAttr()->getCharacteristicOutputs();
ValueIdSet availableInputs = availableOutputsFromLeftChild;
availableInputs += result->getGroupAttr()->getCharacteristicInputs();
ValueIdSet exprOnParent;
// Push the predicate to the righ child. We already have pushed
// it to the left in Join::pushdownCoveredExpr()
result->RelExpr::pushdownCoveredExpr(
result->getGroupAttr()->getCharacteristicOutputs(),
availableInputs,
result->selectionPred(), &exprOnParent, 1);
// We better have pushed everything..
CMPASSERT(result->selectionPred().isEmpty());
RelExpr * filter = result->child(1);
// Eliminate Filter node if no predicates were added to it
if (filter->selectionPred().isEmpty())
{
result->child(1) = filter->child(0);
filter->deleteInstance();
filter = NULL;
}
// synthesize logical properties for my new right child
if (filter)
{
filter->synthLogProp();
filter->getGroupAttr()->addToAvailableBtreeIndexes(
filter->child(0).getGroupAttr()->getAvailableBtreeIndexes());
if (before->getInliningInfo().isMaterializeNodeForbidden())
filter->getInliningInfo().setFlags(II_MaterializeNodeForbidden);
}
result->setDerivedFromRoutineJoin();
return result;
} // RoutineJoinToTSJRule::nextSubstitute()
Int32 RoutineJoinToTSJRule::promiseForOptimization(RelExpr * ,
Guidance * ,
Context * )
{
// Transforming logical joins to logical TSJs enable the physical
// NestedJoin rule to fire. Thus, we would like this rule to fire
// prior to other join transformation rules.
return (Int32)(DefaultTransRulePromise - 1000 /*+ 1000*/);
}
NABoolean RoutineJoinToTSJRule::canBePruned(RelExpr * expr) const
{
// We do not want to prune RJ->TSJ if expr is an RoutineJoin
// since thats its only possible implementation in many cases
return NOT expr->getOperator().match(REL_ROUTINE_JOIN);
}
// -----------------------------------------------------------------------
// methods for JoinToTSJRule
// -----------------------------------------------------------------------
NABoolean JoinToTSJRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr, context))
return FALSE;
Join *joinExpr = (Join *) expr;
if (joinExpr->isRoutineJoin())
return FALSE; // don't want to to this for a routine join
/*
The block was added when JoinToTSJRule was a pass 2 rule by default
This is changed now and it is a pass 1 plan so this code should be
removed. I gurded it with comp_bool_71 however and not removed it in case
we want to set it to be pass2 rule again, then we will have to enable
this code.
*/
// The JoinToTSJRule is enabled in Pass1 ONLY if there is at least
// one embedded update or delete or a stream in the query. For pass1
// fire the rule only for embedded updates and deletes and for streams.
if ((CmpCommon::getDefault(COMP_BOOL_71) == DF_ON) AND
(GlobalRuleSet->getCurrentPassNumber() ==
GlobalRuleSet->getFirstPassNumber()) AND
NOT (joinExpr->child(0).getGroupAttr()->isEmbeddedUpdateOrDelete() OR
joinExpr->child(0).getGroupAttr()->isStream())
)
return FALSE;
// QSTUFF
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isStream());
CMPASSERT (NOT ((Join *)expr)->child(1).getGroupAttr()->isEmbeddedUpdateOrDelete());
// QSTUFF
RelExpr * rightChild = joinExpr->child(1);
if (rightChild && rightChild->getGroupAttr()->getNumTMUDFs() > 0)
return FALSE;
// nested join excluded by heuristics
//if (NOT joinExpr->canTransformToTSJ()) return FALSE;
// QSTUFF
// streams, embedded deletes and embedded updates
// must use tsj's and can't use merge or hash joins.
if (joinExpr->getGroupAttr()->isStream() ||
joinExpr->getGroupAttr()->isEmbeddedUpdateOrDelete() )
return TRUE;
// QSTUFF
// avoid keyless NJs: nested joins that require full table scans.
// skip keyless NJ if all of the following are true:
// 1) no cqs
// 2) nested join is not the only explored implementation option
// 3) cqd to skip keyless NJ is ON
// 4) we have query analysis
// if there is a required shape, skip this keylessNJ heuristic.
NABoolean ignoreCQS =
((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_71) == 1);
NABoolean cqs_skips_keylessNJ_heuristic =
(ignoreCQS ? FALSE :
(context && context->getReqdPhysicalProperty()->getMustMatch() != NULL));
NABoolean canUseMJ = CURRSTMT_OPTDEFAULTS->isMergeJoinConsidered() &&
!(joinExpr->getEquiJoinPredicates().isEmpty());
// if NJ is the only (efficient) implementation option, skip this heuristic.
NABoolean NJisOnlyOption =
// NJ is the only implementation for pushdown compound stmt
((expr->isinBlockStmt() && CURRSTMT_OPTDEFAULTS->pushDownDP2Requested())
OR
// NJ into fact table is the only explored implementation option
(joinExpr->getSource() == Join::STAR_FACT)
OR
// NJ is the only join implementation option
(!joinExpr->allowHashJoin()
OR (!CURRSTMT_OPTDEFAULTS->isHashJoinConsidered() &&
!canUseMJ)));
// cqd to skip keyless NJ
NABoolean keylessNJ_off =
CmpCommon::getDefault(KEYLESS_NESTED_JOINS) == DF_OFF;
// cqd to allow only full inner key NJ
NABoolean fullInnerKey =
CmpCommon::getDefault(NESTED_JOINS_FULL_INNER_KEY) == DF_ON;
// do we have analysis?
NABoolean hasAnalysis = QueryAnalysis::Instance()->isAnalysisON();
Lng32 mtd_mdam_uec_threshold = (Lng32)(ActiveSchemaDB()->getDefaults()).
getAsLong(MTD_MDAM_NJ_UEC_THRESHOLD);
// if no cqs and NJ is not the only explored option and keylessNJ off
// then avoid keyless nested join
if (!cqs_skips_keylessNJ_heuristic AND !NJisOnlyOption AND
keylessNJ_off AND hasAnalysis)
{
// if there is at least 1 index
const SET(IndexDesc *) &availIndexes=
joinExpr->child(1).getGroupAttr()->getAvailableBtreeIndexes();
if (availIndexes.entries() > 0)
{
// get all predicates
ValueIdSet allJoinPreds;
allJoinPreds += joinExpr->getSelectionPred();
allJoinPreds += joinExpr->getJoinPred();
// get all predicates' base columns
ValueIdSet allReferencedBaseCols;
allJoinPreds.findAllReferencedBaseCols(allReferencedBaseCols);
NABoolean handleDivColumnsInMTD =
(CmpCommon::getDefault(MTD_GENERATE_CC_PREDS) == DF_ON);
if ( mtd_mdam_uec_threshold < 0 )
handleDivColumnsInMTD = FALSE;
NABoolean collectLeadingDivColumns = handleDivColumnsInMTD;
NABoolean nj_check_leading_key_skew =
(CmpCommon::getDefault(NESTED_JOINS_CHECK_LEADING_KEY_SKEW) == DF_ON);
Lng32 nj_leading_key_skew_threshold =
(Lng32)(ActiveSchemaDB()->getDefaults()).
getAsLong(NESTED_JOINS_LEADING_KEY_SKEW_THRESHOLD);
// try to find a predicate that matches
// 1st nonconstant key prefix column
NABoolean foundPrefixKey = FALSE;
CollIndex x, i = 0;
for (i = 0; i < availIndexes.entries() && !foundPrefixKey; i++)
{
IndexDesc *currentIndexDesc = availIndexes[i];
const ValueIdList *currentIndexSortKey =
(&(currentIndexDesc->getOrderOfKeyValues()));
NABoolean missedSuffixKey = FALSE;
ValueIdSet divColumnsWithoutKeyPreds;
ValueIdSet leadingKeys;
// get this index's 1st nonconstant key prefix column
for (x = 0;
x < (*currentIndexSortKey).entries() &&
(!foundPrefixKey || fullInnerKey);
x++)
{
ValueId firstkey = (*currentIndexSortKey)[x];
// firstkey with a constant predicate does not count in
// making this NJ better than a HJ. keep going.
ItemExpr *cv;
NABoolean isaConstant = FALSE;
ValueId firstkeyCol;
ColAnalysis *colA = firstkey.baseColAnalysis
(&isaConstant, firstkeyCol);
leadingKeys.insert(firstkeyCol);
if(colA)
{
if (colA->getConstValue(cv,FALSE/*useRefAConstExpr*/))
continue; // try next prefix column
}
else
{
ValueIdSet predsWithConst;
ValueIdSet localPreds = currentIndexDesc->getPrimaryTableDesc()->getLocalPreds();
localPreds.getConstantExprs(predsWithConst);
firstkeyCol = firstkey.getBaseColumn(&isaConstant);
ValueId exprId;
if(predsWithConst.referencesTheGivenValue(firstkeyCol,exprId))
continue;
}
// skip salted columns and constant predicates
if (isaConstant || firstkeyCol.isSaltColumn() )
continue; // try next prefix column
// If firstkeyCol is one of the leading DIVISION columns
// without any predicate attached on it, collect it in
// a ValueIdSet. Later on, we will check the UEC for the
// set. If the UEC is less than a threshold, we will allow
// such "keyless" NJ.
NABoolean isLeadingDivColumn =
firstkeyCol.isDivisioningColumn();
// any predicate on first nonconstant prefix key column?
if (allReferencedBaseCols.containsTheGivenValue(firstkeyCol))
{
if ( collectLeadingDivColumns )
// We will stop collect any additional leading DIV
// key columns from this point on.
collectLeadingDivColumns = FALSE;
// nonconstant prefix key matches predicate
foundPrefixKey = TRUE; // allow this NJ to compete
}
else // no predicate match for prefix key column
{
if ( collectLeadingDivColumns && isLeadingDivColumn )
divColumnsWithoutKeyPreds.insert(firstkeyCol);
else
{
missedSuffixKey = TRUE;
leadingKeys.remove(firstkeyCol);
break; // try next index
}
}
}
if ( handleDivColumnsInMTD && foundPrefixKey ||
(missedSuffixKey && nj_check_leading_key_skew))
{
// If the SC/MC UEC of all leading div columns absent of
// key predicates is greater than a threshold specified via
// CQD MTD_MDAM_NJ_UEC_THRESHOLD, disable "keyless" NJ.
// First figure out the SC/MC UEC on the leading DIV columns
// lacking any key predicates, or RC/uec when the trailing
// key columns lacking any key predicates.
EstLogPropSharedPtr inputLPForChild;
inputLPForChild = joinExpr->child(0).outputLogProp
((*GLOBAL_EMPTY_INPUT_LOGPROP));
EstLogPropSharedPtr outputLogPropPtr =
joinExpr->child(1).getGroupAttr()->
outputLogProp(inputLPForChild);
const ColStatDescList & stats = outputLogPropPtr->getColStats();
const MultiColumnUecList * uecList = stats.getUecList();
if ( uecList ) {
CostScalar uec;
if ( missedSuffixKey && nj_check_leading_key_skew ) {
// max rows from the inner
CostScalar child1card = joinExpr->child(1).getGroupAttr()
->getResultCardinalityForEmptyInput();
uec = uecList->lookup(leadingKeys);
if ( uec.isGreaterThanZero() &&
child1card/uec <=
CostScalar(nj_leading_key_skew_threshold) )
missedSuffixKey = FALSE;
} else {
uec = uecList->lookup(divColumnsWithoutKeyPreds);
if ( uec.isGreaterThanZero() &&
uec > CostScalar(mtd_mdam_uec_threshold) )
foundPrefixKey = FALSE;
}
}
}
// skip partial keyless NJ if all key cols are not covered
if (foundPrefixKey && missedSuffixKey)
foundPrefixKey = FALSE;
}
// all indexes have been tried
if (!foundPrefixKey)
return FALSE; // it's a keyless NJ. avoid it.
}
else // no index
{
if ( CmpCommon::getDefault(NESTED_JOINS_KEYLESS_INNERJOINS) == DF_OFF )
return FALSE; // it's a keyless NJ. avoid it.
else {
// When the CQD is ON, we rely on the code above for the contained NJs
// in the inner, the max cardinality and risk premium on NJs to protect
// against bad contained NJs.
// The code below makes sure we are dealing with joins in the inner.
if ( joinExpr->child(1).getGroupAttr()-> getNumBaseTables() == 1 )
return FALSE;
}
}
}
//------------
// dont use a nested join in there are more than 3 tables
// in the inner side (i.e. the right side)
GroupAnalysis *grpA1 = ((Join *)expr)->child(1).getGroupAttr()->getGroupAnalysis();
if (grpA1)
if (grpA1->getAllSubtreeTables().entries() >
(ULng32)(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_10))
return FALSE;
else
if ((((Join *)expr)->child(1).getGroupAttr()->getNumBaseTables() >
(ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_10)) &&
(CmpCommon::getDefault(COMP_BOOL_112) == DF_OFF))
return FALSE;
if (CURRSTMT_OPTDEFAULTS->optimizerHeuristic4() && context
&& context->getInputLogProp()->getResultCardinality() <= 1
&& joinExpr->getSource() != Join::STAR_FACT) // fact table join in star schema
// is exempt from NJ heuristic pruning
{
CostScalar cardinality0 =
expr->child(0).getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar maxCardinality0 =
expr->child(0).getGroupAttr()->getResultMaxCardinalityForEmptyInput();
// start avoiding nested join when NJprobeTimeInSec > HJscanTimeInSec
// NJprobeTimeInSec = outerRows * 20 * 10 ** -6
// HJscanTimeInSec = (innerSize / (1024 * 1024)) / 100
// so, avoid nested join when
// outerRows * 20 * (10 ** -6) > (innerSize / (1024 * 1024)) / 100
// outerRows * 20 * (10 ** -6) * 100 * 1024 * 1024 > innerSize
// outerRows * 2097 > innerSize
// 2097 or 2000 is a very rough ratio that can change over time
// so we take it as a CQD setting
Lng32 ratio = (ActiveSchemaDB()->getDefaults()).getAsLong
(HJ_SCAN_TO_NJ_PROBE_SPEED_RATIO);
if ( CURRSTMT_OPTDEFAULTS->isHashJoinConsidered() AND ratio > 0 AND
!(joinExpr->getGroupAttr()->isSeabaseMDTable()) )
{
// hash join is clearly faster if reading inner table once (after
// applying local predicates) is faster than reading inner table P
// times (after applying local predicates + join predicate on
// inner table's clustering key columns) where P = #outerRows.
NodeAnalysis *nodeA1 = grpA1->getNodeAnalysis();
TableAnalysis *tabA1 = NULL;
if (nodeA1 != NULL && (tabA1=nodeA1->getTableAnalysis()) != NULL
// We must allow NJ if #outerRows <= 1 because
// HashJoinRule::topMatch disables HJ for that case.
// Otherwise, we can get internal error 2235 "pass one
// skipped, but cannot produce a plan in pass two".
&& cardinality0 > 1) // allow NJ if #outerRows <= 1
{
// innerS is size of data scanned for inner table in HJ plan
CostScalar innerS =
tabA1->getCardinalityAfterLocalPredsOnCKPrefix() *
tabA1->getRecordSizeOfBaseTable();
// if innerS < #outerRows * ratio, prefer HJ over NJ.
if (innerS < cardinality0 * ratio)
{
return FALSE;
}
// innerS and #outerRows are estimates that can be wrong.
// We want to avoid catastrophic nested joins without
// missing too many performance-enhancing nested joins.
// A catastrophic nested join is one whose actual #outerRows
// is much greater than cardinality0. A performance-enhancing
// nested join is one whose actual #outerRows is much less than
// cardinality0. Don't consider NJ if
// innerS * fudgeFactor < maxCard(outer) * ratio
double fudge = CURRSTMT_OPTDEFAULTS->robustHjToNjFudgeFactor();
if (fudge > 0 &&
innerS * fudge < maxCardinality0 * ratio)
{
return FALSE;
}
}
}
// This is temporary to fail nested join if both children
// do not have statistics.
//if ( (CmpCommon::getDefault(COMP_BOOL_67) == DF_ON) AND
// (cardinality0 == 100 ) AND (cardinality1 == 100) )
// return FALSE;
}
//------------
// if Hash Joins are disabled and we do not have equi-join
// predicates merge joins will not be selected, so let's
// enable nested joins
if (NOT CURRSTMT_OPTDEFAULTS->isHashJoinConsidered() AND
joinExpr->getEquiJoinPredicates().isEmpty())
return TRUE;
else
// If nested joins are not allowed then say no.
if (NOT CURRSTMT_OPTDEFAULTS->isNestedJoinConsidered())
return FALSE;
// nested Join is only used for cross products if the default
// NESTED_JOIN_FOR_CROSS_PRODUCTS is ON
if (joinExpr->isCrossProduct() AND
NOT CURRSTMT_OPTDEFAULTS->nestedJoinForCrossProducts())
return FALSE;
// nested join is not good for filtering joins in the star join schema
if (CmpCommon::getDefault(COMP_BOOL_85) == DF_OFF &&
(joinExpr->getSource() == Join::STAR_FILTER_JOIN) &&
!(joinExpr->isRoutineJoin()))
return FALSE;
return TRUE;
}
RelExpr * JoinToTSJRule::nextSubstitute (RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory *& memory)
{
Join * bef = (Join *) before;
// if nested join execluded by heuristics
//if (NOT bef->canTransformToTSJ()) return NULL;
OperatorTypeEnum tsjOpType;
tsjOpType = bef->getTSJJoinOpType();
// Make sure we call the copyTopNode for the Join.
Join *result = (Join *)Rule::nextSubstitute(before,NULL,memory);
result->setOperatorType(tsjOpType);
// If the join came from a RoutineJoin, remember that.
if (bef->derivedFromRoutineJoin())
result->setDerivedFromRoutineJoin();
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
// transfer all the join fields to the new NestedJoin
(void) bef->copyTopNode(result,CmpCommon::statementHeap());
result->resolveSingleColNotInPredicate();
// Recompute the input values that each child requires as well as
// the output values that each child is capable of producing
result->allocateAndPrimeGroupAttributes();
// Push down as many full or partial expressions as can be
// computed by the children.
result->pushdownCoveredExpr(
result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
// If the right child does not have any predicates on its filter,
// then this is a cartesian product. Cartesian products are
// best implemented as hash joins, so just return NULL.
RelExpr *filter = result->child(1);
if (NOT bef->canTransformToTSJ()
// QSTUFF
AND NOT result->getGroupAttr()->isEmbeddedUpdateOrDelete()
AND NOT result->getGroupAttr()->isStream()
// QSTUFF
)
{
filter->contextInsensRules() = *GlobalRuleSet->applicableRules();
}
// Eliminate Filter node if no predicates were added to it
if (filter->selectionPred().isEmpty())
{
result->child(1) = filter->child(0);
filter->deleteInstance();
filter = NULL;
}
// Eliminate Filter node if no NEW predicates were added to it
if (filter)
{
// Check to see if the filter node is adding any new characteristic
// inputs (i.e. outer references). If not, no true join predicates were
// pushed down. So, eliminate this filter.
if( (filter->getGroupAttr()->getCharacteristicInputs() ==
filter->child(0).getGroupAttr()->getCharacteristicInputs()) AND
(filter->getGroupAttr()->getCharacteristicOutputs() ==
filter->child(0).getGroupAttr()->getCharacteristicOutputs()) )
{
result->child(1) = filter->child(0);
filter->deleteInstance();
filter = NULL;
}
}
// Allow cartisian products for antiJoins or if Hash joins are
// disabled
NABoolean hashJoinsEnable = TRUE;
if (CmpCommon::getDefault(HASH_JOINS) == DF_OFF)
hashJoinsEnable = FALSE;
// nested Join is genrally not used for x-products unless the
// NESTED_JOIN_FOR_CROSS_PRODUCTS default is ON
if (NOT CURRSTMT_OPTDEFAULTS->nestedJoinForCrossProducts() AND
NOT filter AND hashJoinsEnable AND NOT bef->isAntiSemiJoin() AND
// QSTUFF
// we really favor nested joins in case of streams and embedded
// embedded updates, so lets retain them
NOT result->getGroupAttr()->isStream() AND
NOT result->getGroupAttr()->isEmbeddedUpdateOrDelete()
// QSTUFF
)
{
result->deleteInstance();
return NULL;
}
// synthesize logical properties for my new right child
if (filter)
{
filter->synthLogProp();
filter->getGroupAttr()->addToAvailableBtreeIndexes(
filter->child(0).getGroupAttr()->getAvailableBtreeIndexes());
if (before->getInliningInfo().isMaterializeNodeForbidden())
filter->getInliningInfo().setFlags(II_MaterializeNodeForbidden);
// if the new join is probe cacheable set the right child as probe cacheable too
if (((NestedJoin*)result)->isProbeCacheApplicable(EXECUTE_IN_MASTER_OR_ESP))
result->child(1).getGroupAttr()->setIsProbeCacheable();
}
// First retain a copy of the equal join expression so that OCR can
// use it to check if the equi join expression completely covers
// right child's partitioning key.
result->setOriginalEquiJoinExpressions(bef->getEquiJoinExpressions());
// For a TSJ all join predicates have been push down to the
// second child. We need to clear the equiJoinPredicates.
result->findEquiJoinPredicates();
// No need to apply transformation rules on TSJ (since they
// have already been applied to the JOIN node). Therefore,
// mark that we have already tried all transformation rules.
// For transformation rules that look beyond the top node (ex. left-shift
// rule), we provide an additional method isTransformComplete() to
// indicate that this subtree should not be transformed further.
result->contextInsensRules() += GlobalRuleSet->transformationRules();
result->setTransformComplete();
// If the before join resulted from a application of MJPrimeTableRule, directly
// or indirectly, we set the result join to be also from MJPrimeTableRule.
if (bef->isJoinFromPTRule())
result->setJoinFromPTRule();
return result;
}
Int32 JoinToTSJRule::promiseForOptimization(RelExpr * ,
Guidance * ,
Context * )
{
// Transforming logical joins to logical TSJs enable the physical
// NestedJoin rule to fire. Thus, we would like this rule to fire
// prior to other join transformation rules.
return (Int32)(DefaultTransRulePromise - 1000 /*+ 1000*/);
}
NABoolean JoinToTSJRule::canBePruned(RelExpr * expr) const
{
// We do not want to prune J->TSJ if expr is an AntiSemiJoin
// since thats its only possible implementation in many cases
return NOT expr->getOperator().match(REL_ANY_ANTI_SEMIJOIN);
}
// -----------------------------------------------------------------------
// methods for TSJFlow rule
// -----------------------------------------------------------------------
NABoolean TSJFlowRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr, context))
return FALSE;
// We can come here for a CALL statement also, ensure that
// this rule is not fired for CALL
if ( REL_CALLSP == expr->getOperatorType ())
return FALSE;
GenericUpdate * updateExpr = (GenericUpdate *) expr;
if (updateExpr->getNoFlow())
return FALSE;
// Sol 10-091202-6798, do not try cursor_delete plan if:
// A. comp_int_74 = 1 (default value is 0) AND rightChild is a DELETE operator
// B. table being scanned is same as table being deleted.
// C. scan doesn't have any alternate index access paths.
// Does it meet condition A?.
if ( ((ActiveSchemaDB()->getDefaults()).getAsLong(COMP_INT_74) == 1) AND
(updateExpr->getOperatorType() == REL_UNARY_DELETE) )
{
Delete * del = (Delete *) expr;
RelExpr* c0= expr->child(0).getGroupAttr()->getLogExprForSynthesis();
if ( c0->getOperatorType() != REL_SCAN )
return TRUE;
Scan * scan = (Scan *)c0;
// Does it meet condition B?.
// check if the table scanned is same as that being deleted.
if ( (scan != NULL) AND
scan->getTableDesc()->getNATable() == del->getTableDesc()->getNATable())
{
// go through all indexes and find out in which way they could
// be used, if this is not already done (needed for next step)
scan->addIndexInfo();
// Does it meet condition C?.
// don't apply the rule if there isn't at least one alternate index.
// cursor_delete doesn't make any sense, subset_delete is better.
if ((scan->getIndexOnlyIndexes().entries() +
scan->getPossibleIndexJoins().entries()) < 2)
return FALSE;
}
}
return TRUE;
}
// ##IM: ?? This rule is not calling pushdownCoveredExpr. Should it?
RelExpr * TSJFlowRule::nextSubstitute (RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory *& /*memory*/)
{
// Create a new "leaf" generic update expression from the old
// unary generic update expression.
GenericUpdate * oldUpdate = (GenericUpdate *) before;
if (oldUpdate->getGroupAttr()->getCharacteristicOutputs().entries())
{
return NULL;
}
RelExpr * newUpdate = oldUpdate->copyTopNode(0,CmpCommon::statementHeap());
OperatorTypeEnum newOptype = NO_OPERATOR_TYPE;
switch (before->getOperatorType())
{
case REL_UNARY_INSERT:
newOptype = REL_LEAF_INSERT;
break;
case REL_UNARY_UPDATE:
newOptype = REL_LEAF_UPDATE;
break;
case REL_UNARY_DELETE:
newOptype = REL_LEAF_DELETE;
break;
default:
ABORT ("Internal error: TSJFlowRule::nextSubstitute");
break;
}
newUpdate->setOperatorType (newOptype);
// Create the new TSJFlow expression
Join * result = new (CmpCommon::statementHeap())
Join(before->child(0),
newUpdate,
REL_TSJ_FLOW,
NULL,
FALSE,
FALSE,
CmpCommon::statementHeap(),
((GenericUpdate *) oldUpdate)->getTableDesc(),
new (CmpCommon::statementHeap()) ValueIdMap(
((GenericUpdate *) oldUpdate)->updateToSelectMap()));
// This TSJ is for a write operation
result->setTSJForWrite(TRUE);
// TSJ for self-referencing updates need special treatment
oldUpdate->configTSJforHalloween( result, newOptype,
before->child(0).getGroupAttr()->getResultCardinalityForEmptyInput());
// For Neo R2 compatibility:
// Transfer the avoidHalloweenR2 flag from the update to the join. If
// this flag is set, then we must generate a safe plan WRT the
// Halloween problem. For the TSJForWrite Join the plan should have
// a SORT operator on the LHS of the Tuple Flow. This causes the
// source to be blocked.
//
result->setAvoidHalloweenR2(oldUpdate->avoidHalloweenR2());
// Now set the group attributes of the result's top node.
result->setGroupAttr(before->getGroupAttr());
// Recompute the input values that each child requires as well as
// the output values that each child is capable of producing
result->allocateAndPrimeGroupAttributes();
// Output values produced by the left child of the tsjFlow
// becomes the inputs for the right child of the tsjFlow.
RelExpr * leftChild = result->child(0);
RelExpr * rightChild = result->child(1);
rightChild->getGroupAttr()->addCharacteristicInputs
(((RelExpr *)before->child(0))->getGroupAttr()->getCharacteristicOutputs());
// Push down as many full or partial expressions as can be
// computed by the children.
result->pushdownCoveredExpr(result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
// Synthesize logical properties for this new leaf generic update node.
((GenericUpdate *)rightChild)->synthLogProp();
result->setBlockStmt( before->isinBlockStmt() );
if (rightChild->getOperatorType() == REL_LEAF_INSERT)
{
// if my right child is an insert node, then buffered inserts are
// allowed. The kind of buffered inserts (VSBB or sidetree) are
// chosen in DP2InsertCursorRule::nextSubstitute. At that time,
// buffered inserts may be disabled due to other conditions
// (See that method).
Insert * ins
= (Insert *)(rightChild->castToRelExpr());
ins->bufferedInsertsAllowed() = TRUE;
// Transfer any required order from the insert node to the TSJ node.
// There will only be a req. order if the user specified an ORDER BY clause.
result->setReqdOrder(ins->reqdOrder());
// Transfer the sidetree insert indicator to the join.
//
// Later on in NJ::createContextForChild(), plans that do not sort
// the data from the outer side will be disabled.
//
// Also transfer sidetree insert flag to OptDefaults so that MUs can use.
if ( ins->isSideTreeInsertFeasible() ) {
result->setTSJForSideTreeInsert(TRUE);
CURRSTMT_OPTDEFAULTS->setSideTreeInsert(TRUE);
}
if (ins->enableTransformToSTI())
result->setEnableTransformToSTI(TRUE);
result->setIsForTrafLoadPrep(ins->getIsTrafLoadPrep());
}
return result;
}
// -----------------------------------------------------------------------
// methods for TSJ rule
// -----------------------------------------------------------------------
NABoolean TSJRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr, context))
return FALSE;
// We can come here for a CALL statement also, ensure that
// this rule is not fired for CALL
if ( REL_CALLSP == expr->getOperatorType ())
return FALSE;
GenericUpdate * updateExpr = (GenericUpdate *) expr;
if (NOT updateExpr->getNoFlow() ||
// no FirstN on top of a TSJ
((updateExpr->getOperatorType() == REL_UNARY_DELETE) && updateExpr->isMtsStatement()))
return FALSE;
// It is not semantically correct to convert a MERGE having a
// "NOT MATCHED" action to a TSJ, since the former has right
// join semantics. (If we converted here to a TSJ, a non-matching
// row would not be returned by the outer child scan node, so
// the inner child merge node would never see it and hence the
// "NOT MATCHED" logic would not be activiated.)
if (updateExpr->isMerge() && updateExpr->insertValues())
return FALSE;
return TRUE;
}
//
// Right now the VSBBInsert's are disabled in the
// DP2InsertCursorRule::nextSubstitute() method if there are any
// indexes on the table. With the new IM code we need to enable
// VSBB inserts for the original insert which is on the left subtree
// and test inserts. With the new tranformation rule if the insert
// has output values it will choose NJ as the parent of the insert.
// But if optimizer chooses the insert to be a VSBBInsert then the
// combination of VSBBInsert with parent as NJ would not work.
// Because VSBBInsert always assumes that the parent is a TupleFlow.
// So may be the new rule may not be right. In that case we need
// to make changes to the TupleFlow in the executor to return rows
// and remove the new rule.
//
// Also:
// Enabling VSBBInserts when there are indexes on the table,
// Enabling Subsets for Delete's and Update's and testing.
//
// ##IM: ?? This rule is not calling pushdownCoveredExpr. Should it?
//
RelExpr * TSJRule::nextSubstitute (RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory *& /*memory*/)
{
// Create a new "leaf" generic update expression from the old
// unary generic update expression.
GenericUpdate * oldUpdate = (GenericUpdate *) before;
RelExpr * oldExpr = before->child(0).getPtr();
RelExpr * newUpdate = oldUpdate->copyTopNode(0,CmpCommon::statementHeap());
OperatorTypeEnum newOptype = NO_OPERATOR_TYPE;
switch (before->getOperatorType())
{
case REL_UNARY_INSERT:
newOptype = REL_LEAF_INSERT;
break;
case REL_UNARY_UPDATE:
newOptype = REL_LEAF_UPDATE;
break;
case REL_UNARY_DELETE:
newOptype = REL_LEAF_DELETE;
break;
default:
ABORT ("Internal error: TSJRule::nextSubstitute");
break;
}
newUpdate->setOperatorType (newOptype);
// Create the new TSJ expression
Join * result = new (CmpCommon::statementHeap())
Join(before->child(0),
newUpdate,
REL_TSJ,
NULL,
FALSE,
FALSE,
CmpCommon::statementHeap(),
oldUpdate->getTableDesc(),
new (CmpCommon::statementHeap()) ValueIdMap(
oldUpdate->updateToSelectMap()));
// This TSJ is for a write operation
result->setTSJForWrite(TRUE);
// Do not allow this TSJ to run inside DP2 if it is created for a MV INSERT
// to avoid possible DP2 starvation (e.g., when the source is a SELECT).
if ((before->getInliningInfo()).isMVLoggingInlined() == TRUE)
result->setAllowPushDown(FALSE);
// TSJ for self-referencing updates need special treatment.
oldUpdate->configTSJforHalloween( result, newOptype,
before->child(0).getGroupAttr()->getResultCardinalityForEmptyInput());
// Now set the group attributes of the result's top node.
result->setGroupAttr(before->getGroupAttr());
// QSTUFF
// in case we force a cursor update to get a place holder
// for the outer selection predicates pushed down to the
// generic update tree we have to set the selection
// predicates at the new root of the generic update tree
if (result->getGroupAttr()->isGenericUpdateRoot())
result->selectionPred() += before->getSelectionPred();
// QSTUFF
// Recompute the input values that each child requires as well as
// the output values that each child is capable of producing
result->allocateAndPrimeGroupAttributes();
RelExpr * leftChild = result->child(0);
RelExpr * rightChild = result->child(1);
// QSTUFF
// a leaf can only produce new or old values in case of an update or delete
// but the copyTopNode method has set the potentialOutputs to both the new
// and the old values. This causes primeGroupAttr to set the characteristic
// outputs of the leaf operator to both the old and new values, this causes
// problems in precodegen as precode gen does not find any of them
// being produced.
// We therefore set the characteristic output of the new leaf to what the
// old unary operator could produce..and pray..
// for a final solution one would have to correct the way potentialOutputs
// are set
rightChild->getGroupAttr()->setCharacteristicOutputs
(((RelExpr *)before)->getGroupAttr()->getCharacteristicOutputs());
// QSTUFF
// In the case of an embedded insert with a selection predicate,
// we need to retrieve the stored available outputs
// from the GenericUpdate group attr.
if (result->getGroupAttr()->isEmbeddedInsert() &&
!result->selectionPred().isEmpty() &&
rightChild->getArity() >0)
{
ValueIdSet availableGUOutputs;
rightChild->child(0)->getInputAndPotentialOutputValues(availableGUOutputs);
result->getGroupAttr()->addCharacteristicOutputs(availableGUOutputs);
} // End of embedded insert setting of characteristic outputs
rightChild->getGroupAttr()->addCharacteristicInputs
(((RelExpr *)before->child(0))->getGroupAttr()->getCharacteristicOutputs());
// Synthesize logical properties for this new leaf generic update node.
((GenericUpdate *)rightChild)->synthLogProp();
// QSTUFF
// we need the unique properties to be set
result->setCursorUpdate(TRUE);
// QSTUFF
result->setBlockStmt( before->isinBlockStmt() );
if (rightChild->getOperatorType() == REL_LEAF_INSERT)
{
Insert * ins
= (Insert *)(rightChild->castToRelExpr());
// MV -
// Allow VSBB Inserts even when its outputs are used for IM etc.
ins->bufferedInsertsAllowed() = TRUE;
// Transfer any required order from the insert node to the TSJ node.
// There will only be a req. order if the user specified an ORDER BY clause.
result->setReqdOrder(ins->reqdOrder());
result->setIsForTrafLoadPrep(ins->getIsTrafLoadPrep());
}
return result;
}
// -----------------------------------------------------------------------
// methods for FilterRule
// -----------------------------------------------------------------------
Guidance * FilterRule::guidanceForExploringChild(Guidance *,
Context *,
Lng32)
{
return new (CmpCommon::statementHeap())
FilterGuidance(CmpCommon::statementHeap());
}
RelExpr * FilterRule0::nextSubstitute(RelExpr * before,
Context *,
RuleSubstituteMemory * &)
{
RelExpr * result;
Filter * bef = (Filter *) before;
RelExpr * leafNode = before->child(0);
CMPASSERT(before->getOperatorType() == REL_FILTER);
// copy the tree underneath the filter
result = getSubstitute()->copyTree(CmpCommon::statementHeap());
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
result->selectionPred() += bef->selectionPred();
if ((leafNode->getOperatorType() == REL_SCAN))
((Scan *)result)->addIndexInfo();
return result;
}
RelExpr * FilterRule1::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * &)
{
RelExpr * result;
RelExpr * unaryNode = before->child(0);
ValueIdSet newPredicates;
CMPASSERT(before->getOperatorType() == REL_FILTER);
// copy the tree underneath the filter
result = getSubstitute()->copyTree(CmpCommon::statementHeap());
// If the child of the filter is itself a filter, then just return NULL.
// Since children groups are explored first, allow the child filter
// node to be collapsed first prior to applying this rule.
//
// NOTE: The above statement is not true. Due to a bug in Cascades,
// the child filter group is NOT explored first. An attempt was
// made to get this to work by adding a kind of guidance called
// "FilterGuidance". This caused worse problems. So, the two lines
// below that return if the child of the filter is a filter were
// removed. This caused worse problems, too. An attempt was made
// to solve those problems but that caused even worse problems.
// So, for now, we have to live with the original problem, which
// is documented in CR# 10-001006-2815. To learn about the
// "worse" problems and the attempted fixes, see CR# 10-001204-9989
// and CR# 10-010104-0497.
if (result->getOperatorType() == REL_FILTER)
return NULL;
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
// Find out if there are any new predicates to push down
newPredicates = before->selectionPred();
newPredicates -= result->selectionPred();
// NOTE: even if there are no new predicates, if the char. inputs of
// the result node are different from the char. inputs of the
// original node "unaryNode", then we still need to perform
// predicate pushdown. This is because additional inputs can change the
// meaning of a VEGPredicate in a node (instead of being an IS NOT NULL
// predicate, the predicate now compares two VEG members).
if (NOT newPredicates.isEmpty() OR
unaryNode->getGroupAttr()->getCharacteristicInputs() !=
result->getGroupAttr()->getCharacteristicInputs())
{
// push down to the filter node below result as many full or
// partial expressions as can be computed by the (Filter) child.
result->selectionPred() += newPredicates;
// recompute the input values that each child of result requires
// as well as the output values that each child is capable of
// producing.
result->allocateAndPrimeGroupAttributes();
// do the pushdown, which has the side-effect of recomputing
// the correct characteristic inputs and outputs of the child
result->pushdownCoveredExpr(
result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
}
// If no predicates were placed on the filter
// node below result then remove the filter node.
Filter * filter = (Filter *)(result->child(0)->castToRelExpr());
if (((Filter *)before)->reattemptPushDown()) filter->setReattemptPushDown();
if (filter->selectionPred().isEmpty())
{
result->child(0) = filter->child(0).getPtr();
filter->deleteInstance();
}
else // new filter has predicates
{
// If no new predicates could be pushed to the filter node
// delete it.
if( (filter->getGroupAttr()->getCharacteristicInputs() ==
filter->child(0).getGroupAttr()->getCharacteristicInputs()) AND
(filter->getGroupAttr()->getCharacteristicOutputs() ==
filter->child(0).getGroupAttr()->getCharacteristicOutputs()) AND
(NOT filter->reattemptPushDown()) )
{
result->child(0) = filter->child(0).getPtr();
filter->deleteInstance();
}
else // new filter predicates are new
{
// If the original unary node was a filter, get rid of it. It
// should no longer be necessary, since we are saving the new one.
if (result->getOperatorType() == REL_FILTER)
{
Filter* resultAsFilter = (Filter *)(result->castToRelExpr());
// Make sure the unary node filter - i.e. the original
// lower level filter - gave all it's predicates to the
// filter we just pushed down.
DCMPASSERT(resultAsFilter->selectionPred().isEmpty());
// Get rid of the original lower level filter node by setting
// the result to it's child.
result = filter;
resultAsFilter->deleteInstance();
}
// synthesize logical properties for this new node.
filter->synthLogProp();
} // end if new filter preds are new
} // end if new filter has predicates
return result;
}
RelExpr * FilterRule2::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * &)
{
RelExpr * result;
RelExpr * binaryNode = before->child(0);
ValueIdSet newPredicates;
NABoolean recomputeEquiJoinPred = FALSE; //Sol no:10-030731-8378
// Solution 10-031107-1132: due to a group merge, Filter and Filter(cut)
// may end up in the same group. In that case, do not apply this rule
if ((before->getGroupId() != INVALID_GROUP_ID) &&
(before->getGroupId() == binaryNode->getGroupId())
)
return NULL;
CMPASSERT(before->getOperatorType() == REL_FILTER);
// copy the tree underneath the filter
result = getSubstitute()->copyTree(CmpCommon::statementHeap());
// now set the group attributes of the result's top node
result->setGroupAttr(before->getGroupAttr());
RelExpr * leftChild = result->child(0);
RelExpr * rightChild = result->child(1);
// recompute the input values that each child of result requires
// as well as the output values that each child is capable of
// producing.
result->allocateAndPrimeGroupAttributes();
// Find out if there are any new predicates to push down
newPredicates = before->selectionPred();
newPredicates -= result->selectionPred();
// NOTE: even if there are no new predicates, if the char. inputs of
// the result node are different from the char. inputs of the
// original node "binaryNode", then we still need to perform
// predicate pushdown. This is because additional inputs can change the
// meaning of a VEGPredicate in a node (instead of being an IS NOT NULL
// predicate, the predicate now compares two VEG members).
if (NOT newPredicates.isEmpty() OR
binaryNode->getGroupAttr()->getCharacteristicInputs() !=
result->getGroupAttr()->getCharacteristicInputs())
{
// push down to the filter node below result as many full or
// partial expressions as can be computed by the (Filter) child.
result->selectionPred() += newPredicates;
result->pushdownCoveredExpr(
result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred() );
// If the binary node in the before pattern is a Join, then we
// need to recompute the equijoin predicates in the result, since
// we now have changed the predicates attached to the result.
// We also need to set the joinFromPTRule_ flag of result if set
// in the binaryNode
// Sol no:10-030731-8378-> commented out the equi-join
// check.Do it after the removal of the redundant filters, if any.
recomputeEquiJoinPred = TRUE;
//<-Sol no:10-030731-8378
}
if (binaryNode->getOperator().match(REL_ANY_JOIN))
{
// If the binaryNode join resulted from a application of MJPrimeTableRule, directly
// or indirectly, we set the result join to be also from MJPrimeTableRule.
if (((Join *)binaryNode)->isJoinFromPTRule())
((Join *)result)->setJoinFromPTRule();
}
// If no predicates were placed on the filter nodes below result
// then remove them else synthesis their logical properties.
Filter * filter = (Filter *)(result->child(0)->castToRelExpr());
if (((Filter *)before)->reattemptPushDown()) filter->setReattemptPushDown();
if (filter->selectionPred().isEmpty())
{
result->child(0) = filter->child(0).getPtr();
filter->deleteInstance();
}
else
{
// If no new predicates could be pushed to the filter node
// delete it.
if( (filter->getGroupAttr()->getCharacteristicInputs() ==
filter->child(0).getGroupAttr()->getCharacteristicInputs()) AND
(filter->getGroupAttr()->getCharacteristicOutputs() ==
filter->child(0).getGroupAttr()->getCharacteristicOutputs()) AND
(NOT filter->reattemptPushDown()) )
{
result->child(0) = filter->child(0).getPtr();
filter->deleteInstance();
}
else
// synthesize logical properties for this new node.
filter->synthLogProp();
}
filter = (Filter *)(result->child(1)->castToRelExpr());
if (((Filter *)before)->reattemptPushDown()) filter->setReattemptPushDown();
if (filter->selectionPred().isEmpty())
{
result->child(1) = filter->child(0).getPtr();
filter->deleteInstance();
}
else
{
// If no new predicates could be pushed to the filter node
// delete it.
if( (filter->getGroupAttr()->getCharacteristicInputs() ==
filter->child(0).getGroupAttr()->getCharacteristicInputs()) AND
(filter->getGroupAttr()->getCharacteristicOutputs() ==
filter->child(0).getGroupAttr()->getCharacteristicOutputs()) AND
(NOT filter->reattemptPushDown()) )
{
result->child(1) = filter->child(0).getPtr();
filter->deleteInstance();
}
else
// synthesize logical properties for this new node.
filter->synthLogProp();
}
//Sol no:10-030731-8378->
if (recomputeEquiJoinPred)
{
if (binaryNode->getOperator().match(REL_ANY_JOIN))
((Join *)result)->findEquiJoinPredicates();
}
//<-Sol no:10-030731-8378
return result;
}
Int32 FilterRule::promiseForOptimization(RelExpr *,
Guidance *,
Context *)
{
// Eliminating the filter has the highest promise, higher than
// any enforcer rule (only enforcer rules compete with this rule anyway)
return AlwaysBetterPromise;
}
// -----------------------------------------------------------------------
// methods for class GroupByMVQRRule
// -----------------------------------------------------------------------
NABoolean GroupByMVQRRule::topMatch(RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr,context))
return FALSE;
if ((expr->getOperatorType() != REL_GROUPBY) && (expr->getOperatorType() != REL_AGGREGATE))
return FALSE;
// For optimization levels below the medium level we do not run the GroupByMVQRRule
if (CURRSTMT_OPTDEFAULTS->optLevel() < OptDefaults::MEDIUM)
return FALSE;
// rule is disabled
if (CmpCommon::getDefaultLong(MVQR_REWRITE_LEVEL) < 1)
return FALSE;
GroupByAgg *grby = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// If this GroupByAgg was created by the GroupBySplitRule, no
// further transformation rules should be applied to it.
// ---------------------------------------------------------------------
if (NOT grby->isNotAPartialGroupBy())
return FALSE;
// ---------------------------------------------------------------------
// If this GroupByAgg was created by the AggrDistinctEliminationRule or
// GroupByOnJoinRule, no further transformation rules should be
// applied to it.
// ---------------------------------------------------------------------
if (grby->aggDistElimRuleCreates() || grby->groupByOnJoinRuleCreates())
return FALSE;
// Any matching MVs for this JBBsubset
JBBSubsetAnalysis* grbyJBBSubsetAnalysis = grby->getJBBSubsetAnalysis();
if (grbyJBBSubsetAnalysis &&
grbyJBBSubsetAnalysis->getMatchingMVs().entries() &&
!(grbyJBBSubsetAnalysis->getMatchingMVs()[0]->alreadyOptimized()))
return TRUE;
return FALSE;
}
RelExpr * GroupByMVQRRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
RelExpr *result = NULL;
CMPASSERT((before->getOperatorType() == REL_GROUPBY) || (before->getOperatorType() == REL_AGGREGATE));
GroupByAgg *grby = (GroupByAgg *) before;
if (memory == NULL)
{
// -----------------------------------------------------------------
// this is the first call, create info on all matches
// -----------------------------------------------------------------
JBBSubsetAnalysis* jbbSubsetAnal = grby->getGBAnalysis()->getJBB()->getMainJBBSubset().getJBBSubsetAnalysis();
CollIndex numMatches = jbbSubsetAnal->getMatchingMVs().entries();
if (numMatches)
{
// allocate a new memory for multiple substitutes
memory = new (CmpCommon::statementHeap())
RuleSubstituteMemory(CmpCommon::statementHeap());
for (CollIndex matchIndex = 0; matchIndex < numMatches; matchIndex++)
{
// get the next match
MVMatchPtr match = jbbSubsetAnal->getMatchingMVs()[matchIndex];
match->setAlreadyOptimized();
RelExpr *matchExpr = match->getMvRelExprTree();
// now set the group attributes of the result's top node
matchExpr->setGroupAttr(before->getGroupAttr());
// insert the match into the substitute memory
memory->insert(matchExpr);
} // for each match
} // if numMatches
} // memory == NULL
// ---------------------------------------------------------------------
// handle case of multiple substitutes
// ---------------------------------------------------------------------
if (memory != NULL)
{
result = memory->getNextSubstitute();
if (result == NULL)
{
// returned all the substitutes
// now delete the substitute memory, so we won't be called again
delete memory;
memory = NULL;
}
else
{
// synth the MVI
result->synthLogProp();
}
// return the next retrieved substitute
return result;
}
else
return NULL; // rule didn't fire
}
// -----------------------------------------------------------------------
// methods for class GroupByEliminationRule
// -----------------------------------------------------------------------
GroupByEliminationRule::~GroupByEliminationRule() {}
NABoolean GroupByEliminationRule::topMatch(RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr,context))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *grby = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// If this GroupByAgg was created by the GroupBySplitRule, no
// further transformation rules should be applied to it. The
// GroupBySplitRule replaces GB(CutOp) with a GB(GB(CutOp)).
// Our intention is to apply the relevant transformation rules
// to the original pattern but not to its substitute so that each
// GroupByAgg in the substitute is subjected to implementation
// rules only.
// ---------------------------------------------------------------------
if (NOT grby->isNotAPartialGroupBy()) // do not apply to partial gb
return FALSE;
// We do NOT eliminate any aggregate nodes (nodes without groupby columns),
// since that would cause problems with the case of an empty input
// and since it wouldn't save any cost.
if (grby->groupExpr().isEmpty())
return FALSE;
// do not eliminate group by if rollup is being done
if (grby->isRollup())
return FALSE;
return (grby->child(0).getGroupAttr()->isUnique(grby->groupExpr()));
}
Int32 GroupByEliminationRule::promiseForOptimization(RelExpr *,
Guidance *,
Context *)
{
// give this highest priority, no groupby is always faster than a groupby
return AlwaysBetterPromise;
}
RelExpr * GroupByEliminationRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
// the substitute in the rule is simply a Cut operator
RelExpr *result = getSubstitute()->copyTree(CmpCommon::statementHeap());
CMPASSERT(before->getOperatorType() == REL_GROUPBY);
GroupByAgg *grby = (GroupByAgg *) before;
// ---------------------------------------------------------------------
// Make four different substitutes, depending on whether expressions
// need to be rewritten and whether there are HAVING predicates:
//
// a) cut ...if no expressions need to be rewritten, if
// there are no HAVING predicates, and if the group
// attributes of the result are the same as those
// of the cut
//
// b) filter ...if there is a HAVING predicate, but no expressions
// | need to be rewritten, OR if the cut operator's group
// cut attributes are not the same as the result's group
// attributes,
//
// c) mvi ...if there is no HAVING predicate, but expressions
// | need to be rewritten,
// cut
//
// d) mvi ...you guessed it: if there are HAVING predicates
// | and if expressions need to be rewritten.
// filter
// |
// cut
//
// ---------------------------------------------------------------------
// If there are no aggregates, then we know that no expressions need to
// be rewritten and that we can safely return the cut node (or a
// node with the same group attributes) as the result.
if (grby->aggregateExpr().isEmpty())
{
// if there is a having predicate or if the group attributes of
// the child are different from the result's group attributes, then
// create a filter node on top of the result so far
if (NOT (grby->selectionPred().isEmpty() AND
*(grby->getGroupAttr()) == *(grby->child(0).getGroupAttr())))
{
// case b)
result = new (CmpCommon::statementHeap()) Filter(result);
((Filter *)result)->setReattemptPushDown();
result->selectionPred() = grby->selectionPred();
// NOTE: the query
//
// select a,b
// from t
// group by a,b,c
//
// where a,b,c is unique is an example for the case
// where we generate an empty filter node just because
// the group attributes are different.
}
// else /* case a) */
}
else
{
// Otherwise, create a MapValueIds node, replace the aggregate functions
// with values that contain the result of an aggregation on a single row
// (e.g. count(*) ==> 1, sum(x) ==> x, max(x) ==> x), and map the
// input value ids to the corresponding output value ids.
MapValueIds *mvi = new (CmpCommon::statementHeap()) MapValueIds(result);
// rewrite all aggregate functions and remap references to
// them to their new values
for (ValueId x = grby->aggregateExpr().init();
grby->aggregateExpr().next(x);
grby->aggregateExpr().advance(x))
{
CMPASSERT(x.getItemExpr()->isAnAggregate());
Aggregate *aggrExpr = (Aggregate *) x.getItemExpr();
mvi->addMapEntry(
aggrExpr->getValueId(),
aggrExpr->rewriteForElimination()->getValueId());
}
// Remember the original aggregate inputs, which contain information
// needed to rewrite VEGReferences into actual columns in the generator.
// NOTE: this might cause some unnecessary expressions to be carried to
// this node, but the cost for this shouldn't be too high.
// Removed 10/10/16 as part of fix for TRAFODION-2127
// These values were not used in MapValueIds::preCodeGen.
// Could consider adding this if there are issue in preCodeGen.
// ValueIdSet valuesForRewrite;
// grby->getValuesRequiredForEvaluatingAggregate(valuesForRewrite);
// mvi->addValuesForVEGRewrite(valuesForRewrite);
// If there are having predicates, put a filter below the mvi
// node. Then map the having predicates and attach them to
// the filter.
if (NOT grby->selectionPred().isEmpty())
{
// case d)
// install the a filter below the mvi node
Filter *filter = new (CmpCommon::statementHeap())
Filter(mvi->child(0));
filter->setReattemptPushDown();
mvi->setChild(0,filter);
// Map the HAVING predicate using the map from the newly allocated
// MapValueIds node. This used to be done by attaching the HAVING
// predicate to the mvi node and letting pushdown do the mapping,
// then asserting that the predicate did, in fact, get pushed down.
// The problem with this is that if the predicate only has outer
// references and does not reference any columns that are output
// by the child of the mvi then the predicate will not be pushed
// down (see VegPredicate::isCovered). An example predicate is
// t1.vch7 = 'b', where t1.vch7 is an outer reference.
ValueIdSet selPredsRewritten;
ValueIdSet selPreds = grby->selectionPred();
mvi->getMap().rewriteValueIdSetDown(selPreds,selPredsRewritten);
filter->selectionPred() = selPredsRewritten;
// allocate group attributes for the filter
mvi->setGroupAttr(before->getGroupAttr());
mvi->allocateAndPrimeGroupAttributes();
mvi->pushdownCoveredExpr(
mvi->getGroupAttr()->getCharacteristicOutputs(),
mvi->getGroupAttr()->getCharacteristicInputs(),
mvi->selectionPred() );
// perform synthesis on the new filter node
mvi->child(0)->synthLogProp ();
}
// else /* case c) */
result = mvi;
}
// come up with the group attributes for the new result
result->setGroupAttr(before->getGroupAttr());
// this rule fires only once
CMPASSERT(memory == NULL);
return result;
}
// -----------------------------------------------------------------------
// The GroupBySplitRule
//
// The optimizer views a GroupByAgg to be in one of four forms:
//
// 1) A "full" groupby, i.e., it is not a partial groupby.
//
// The parser creates a group by of this form. A groupby of this
// form is seen by all the phases of processing from name binding
// through code generation.
//
// The method isNotAPartialGroupBy() returns TRUE for a "full"
// groupby and false for all other forms.
//
// 2) Partial groupby operators.
//
// Partial groupby operators can appear in the solution for a
// group by because they are created by the application of the
// transformation rule, the GroupBySplitRule. The GroupBySplitRule
// fires once for a "full" groupby and can fire once more for a
// partial groupby that is a leaf. The rule replaces
// GB(X) with GB(GB(X))
// Partial groupby operators are only seen by the optimizer, the
// pre code generator and the code generator.
//
// When the GroupBySplitRule fires on a "full" groupby it produces
// the following pairs of groupbys
//
// a) Root of the partial groupby subtree
//
// The root of the partial groupby consolidates the partial groups
// that are formed by one or more groupbys that independently
// compute partial groups.
//
// The method isAPartialGroupByRoot() returns TRUE only for such
// a partial groupby.
//
// b) 1st Leaf of the partial groupby subtree.
//
// The 1st leaf of the partial groupby subtree is a simply the
// the lowermost partial groupby created when the GroupBySplitRule
// fires on a "full" groupby. The term "leaf" is convenient for
// naming the partial groupby operators that are contained in the
// substitute. The reader should realize that, in reality, the
// GroupByAgg is a unary operator; the partial groupby will always
// posses a child, regardless of whether it is called a "leaf"!
//
// The method isAPartialGroupByLeaf1() returns TRUE only for such
// a partial groupby.
//
// The GroupBySplitRule can fire on the 1st leaf to produce another
// pair of parital groupby operators called the non-leaf partial
// groupby together with the 2nd leaf.
//
// c) Non-leaf partial groupby
//
// A plan can contain three partial groupbys that span two
// levels of the tree, the root, a non-leaf and a leaf.
// The non-leaf partially consolidates partial groups
// that are formed by the 2nd leaf of the partial groupby
// subtree. It adds one more operator to the pipeline but
// increases the degree of parallelism for the leaves.
//
// The method isAPartialGroupByNonLeaf() returns TRUE only for such
// a partial groupby.
//
// d) 2nd Leaf of the partial groupby subtree.
//
// The 2nd leaf of the partial groupby subtree is a simply the
// the lowermost partial groupby created when the GroupBySplitRule
// fires on a partial groupby that is a 1st leaf.
//
// The method isAPartialGroupByLeaf2() returns TRUE only for such
// a partial groupby.
//
// The distinction between the 1st leaf and the 2nd leaf is made purely to
// aid costing in the optimizer. The code generator processes both
// the forms of the leafs in an identical manner. The method
// isAPartialGroupByLeaf() is defined to permit the code generator
// to identify a leaf, independent of its form.
//
// A GroupByAgg operator in any one of the above forms uses the same
// implementation for grouping the data at run time. This means,
// regardless of its form, a GroupByAgg will either use the hash
// grouping or sorted grouping implementation at run time.
//
// -----------------------------------------------------------------------
// -----------------------------------------------------------------------
// GroupByTernarySplitRule
//
// The following rule is only used for performing a split of a
// 1st Leaf of a partial groupby subtree.
// -----------------------------------------------------------------------
GroupByTernarySplitRule::~GroupByTernarySplitRule() {}
// topMatch() examines the Context.
NABoolean GroupByTernarySplitRule::isContextSensitive() const
{
// WaveFix Begin
// This rule is context insensitive when the WaveFix is On
if (QueryAnalysis::Instance() &&
QueryAnalysis::Instance()->dontSurfTheWave())
return FALSE;
// WaveFix End
return TRUE;
}
NABoolean GroupByTernarySplitRule::topMatch(RelExpr * expr,
Context * context)
{
// WaveFix Begin
// This is part of the fix for the count(*) wave
// This rule is disabled by default, except if there
// is a scalar aggregate query on a single multi-partition table, a query
// like Select count(*) from fact;
// In such a case we would like to use this rule to
// get a layer of esps, this causes the plan to fixup
// in parallel avoiding the serial fixup if the plan is
// just the master executor on top of dp2. The serial
// fixup causes the query to execute in wave pattern, since
// each dp2 is fixed up and then starts execution. Due to serial
// fixup a dp2 is fixed up, and then we move to the next
// dp2 causing the wave pattern.
if (!(QueryAnalysis::Instance() &&
QueryAnalysis::Instance()->dontSurfTheWave()))
{
return FALSE; // $$$ pending debugging
}
// WaveFix End
// Old Code Begin
// return FALSE; // $$$ pending debugging
// Old Code End
if (NOT Rule::topMatch(expr,context)) // MUST be a GroupByAgg
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *bef = (GroupByAgg *) expr;
// WaveFix Begin
if (!(QueryAnalysis::Instance() &&
QueryAnalysis::Instance()->dontSurfTheWave()))
{
// WaveFix End
// Note this IF is not tested because of the "return FALSE"
// at the beginning of the method.
// If the location is in DP2, the groupBy is in a CS, and
// push-down is requested, do not allow split.
if (context AND
context->getReqdPhysicalProperty()->executeInDP2() AND
CURRSTMT_OPTDEFAULTS->pushDownDP2Requested() AND
bef->isinBlockStmt()
)
return FALSE;
// WaveFix Begin
}
// WaveFix End
if (NOT bef->isAPartialGroupByLeaf1()) // MUST be a partial groupby leaf
return FALSE;
// ---------------------------------------------------------------------
// Examine the aggregate functions. If any one of them cannot be
// evaluated in stages, such as a partial aggregation followed
// by finalization, then do not split this GroupByAgg.
// ---------------------------------------------------------------------
if (NOT bef->aggregateEvaluationCanBeStaged())
return FALSE;
// WaveFix Begin
if (QueryAnalysis::Instance() &&
QueryAnalysis::Instance()->dontSurfTheWave())
{
return TRUE;
}
// WaveFix End
// ---------------------------------------------------------------------
// Perform a ternary split for a partial groupby only when the required
// physical property permits a degree of parallelism greater than
// one or requires more than one partitions.
// ---------------------------------------------------------------------
if (context AND context->getReqdPhysicalProperty())
{
const ReqdPhysicalProperty* const rppForMe =
context->getReqdPhysicalProperty();
if ( (rppForMe->getCountOfAvailableCPUs() == 1) OR
(rppForMe->getPartitioningRequirement() AND
(rppForMe->getPartitioningRequirement()->
getCountOfPartitions() == 1)) OR
rppForMe->executeInDP2() )
return FALSE;
}
else
return FALSE;
return TRUE;
} // GroupByTernarySplitRule::topMatch()
NABoolean GroupByTernarySplitRule::canMatchPattern(
const RelExpr * pattern) const
{
// consider both the substitute (a map value ids) and its child
// as possible output patterns
return (getSubstitute()->getOperator().match(pattern->getOperator()) OR
getSubstitute()->child(0)->getOperator().match(pattern->getOperator()));
}
// -----------------------------------------------------------------------
// Aggregate Distinct Elimination Rule
//
// -----------------------------------------------------------------------
AggrDistinctEliminationRule::~AggrDistinctEliminationRule() {}
NABoolean AggrDistinctEliminationRule::topMatch(RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr,context))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *bef = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// See description above or in RelGrby.h.
// ---------------------------------------------------------------------
if (NOT bef->isNotAPartialGroupBy())
return FALSE;
ValueIdSet distinctColumns; // columns (exprs) referenced
// in non-redundant DISTINCT
// aggregates
CollIndex numNonMultiDistinctAggs = 0;
const ValueIdSet &aggrs = bef->aggregateExpr();
for (ValueId x = aggrs.init(); aggrs.next(x); aggrs.advance(x))
{
Aggregate *agg = (Aggregate *) x.getItemExpr();
CMPASSERT(x.getItemExpr()->isAnAggregate());
if (agg->isDistinct())
{
ValueIdSet uniqueSet = bef->groupExpr();
uniqueSet += agg->getDistinctValueId();
if (NOT bef->child(0).getGroupAttr()->isUnique(uniqueSet)) {
distinctColumns += agg->getDistinctValueId();
if((agg->getDistinctValueId() != agg->child(0)) ||
(agg->getArity() > 1)) {
numNonMultiDistinctAggs++;
}
}
}
}
ULng32 dc = distinctColumns.entries();
if (dc == 0)
{
// Do not fire this rule if there are no aggregate distincts.
// Fire the GroupBySplitRule instead.
return FALSE;
}
else if (dc > 1)
{
// This rule can't handle multiple combination of DISTINCTs, since
// it needs to perform duplicate elimination, which can't be done
// for different column lists at the same time. A future solution
// will either have to make "dc" queries out of this, use the
// transpose operator, or will be an implementation of multiple
// distincts in the executor. The latter is the reason why we kept
// this an optimizer rule and didn't apply it in the normalizer.
// Indicate the real cause for the error here. The optimizer
// will not generate an execution plan which will result in
// an additional error after the end of the optimization phase.
// Can handle multiple distinct values for most cases
//
if(numNonMultiDistinctAggs == 0) {
return TRUE;
}
*CmpCommon::diags() << DgSqlCode(-6001);
return FALSE;
}
return TRUE;
} // AggrDistinctEliminationRule::topMatch()
RelExpr * AggrDistinctEliminationRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
GroupByAgg *bef = (GroupByAgg *) before;
MapValueIds *topMap;
GroupByAgg *upperGB;
GroupByAgg *lowerGB;
const ValueIdSet &aggrs = bef->aggregateExpr();
ValueIdSet toBeMadeDistinct;
ValueIdSet distinctAggregates;
ValueIdList nonDistinctAggregates;
ValueIdList origNonDistinctAggregates;
// Create the default substitute, two groupbys stacked on each other
// with a MapValueIds on the very top
topMap = (MapValueIds *) Rule::nextSubstitute(before,NULL,memory);
upperGB = (GroupByAgg *) topMap->child(0).getPtr();
lowerGB = (GroupByAgg *) upperGB->child(0).getPtr();
// Clear the upper aggregate expressions, they will be rewritten.
upperGB->aggregateExpr().clear();
// rewrite the DISTINCT aggregate functions by eliminating the DISTINCT
// - For each DISTINCT aggregate in the original list, add a copy
// of that function to the result, but without the DISTINCT.
ValueId x = aggrs.init();
for (; aggrs.next(x); aggrs.advance(x))
{
Aggregate *a = (Aggregate *) x.getItemExpr();
NABoolean reallyDistinct = FALSE;
if (a->isDistinct())
{
ValueIdSet uniqueSet = bef->groupExpr();
uniqueSet += a->getDistinctValueId();
if (NOT bef->child(0).getGroupAttr()->isUnique(uniqueSet))
reallyDistinct = TRUE;
}
if (reallyDistinct)
{
// move a copy of this aggregate to the top groupby
// and remove the DISTINCT
Aggregate *b = (Aggregate *)
a->copyTopNode(0, CmpCommon::statementHeap());
b->setDistinct(FALSE);
b->child(0) = a->child(0);
b->synthTypeAndValueId();
// collect all those arguments of DISTINCT groupbys that
// really need to be made distinct, and ignore those that
// already are distinct
toBeMadeDistinct += a->getDistinctValueId();
distinctAggregates += x;
upperGB->aggregateExpr() += b->getValueId();
// indicate that the result that is supposed to be stored in
// x is now delivered through the rewritten aggregate function
topMap->addMapEntry(x,b->getValueId());
topMap->addValueForVEGRewrite(x);
}
else
{
nonDistinctAggregates.insert(x);
origNonDistinctAggregates.insert(x);
}
}
CollIndex distValues = toBeMadeDistinct.entries();
// Check for special case of multiple distinct aggrs
//
Transpose *xPose = NULL;
ValueIdMap distinctMap;
if(distValues > 1) {
xPose = new (CmpCommon::statementHeap()) Transpose();
xPose->child(0) = lowerGB->child(0);
lowerGB->child(0) = xPose;
xPose->setTransUnionVectorSize(2);
xPose->transUnionVector() = new (CmpCommon::statementHeap())
ValueIdList[xPose->transUnionVectorSize()];
CollIndex distNum = 0;
for (x = upperGB->aggregateExpr().init();
upperGB->aggregateExpr().next(x);
upperGB->aggregateExpr().advance(x))
{
Aggregate *a = (Aggregate *) x.getItemExpr();
// For each distinct value, create a Transpose set and
// add a mapping from the result of the transpose to the
// original distinct value.
//
if(!(distinctMap.getTopValues().contains(a->child(0)))) {
ValueIdList transVals;
ItemExpr *nullVal = new (CmpCommon::statementHeap()) ConstValue();
NAType *nullType =
a->child(0)->getValueId().getType().newCopy(CmpCommon::statementHeap());
nullType->setNullable(TRUE);
nullVal = new (CmpCommon::statementHeap())
Cast(nullVal, nullType);
nullVal->synthTypeAndValueId();
for(CollIndex i = 0; i < distValues; i++) {
if(distNum == i)
transVals.insert(a->child(0));
else
transVals.insert(nullVal->getValueId());
}
ValueIdUnion *valVidu = new(CmpCommon::statementHeap())
ValueIdUnion(transVals, NULL_VALUE_ID);
valVidu->synthTypeAndValueId();
xPose->transUnionVector()[1].insert(valVidu->getValueId());
distinctMap.addMapEntry(a->child(0), valVidu->getValueId());
distNum++;
}
ValueId newVal;
distinctMap.mapValueIdDown(a->child(0), newVal);
a->child(0) = newVal;
// Redrive type of aggregate since child is now nullable.
a->synthTypeAndValueId(TRUE);
}
CMPASSERT(xPose && (distNum == distValues));
toBeMadeDistinct = distinctMap.getBottomValues();
for (CollIndex n = 0; n < nonDistinctAggregates.entries(); n++)
{
x = nonDistinctAggregates[n];
Aggregate *a = (Aggregate *) x.getItemExpr();
// make a copy of this aggregate
// we will be changing it later
Aggregate *b = (Aggregate *)
a->copyTopNode(0, CmpCommon::statementHeap());
b->child(0) = a->child(0);
b->synthTypeAndValueId();
nonDistinctAggregates[n] = b->getValueId();
ValueIdList transVals;
ItemExpr *nullVal = new (CmpCommon::statementHeap()) ConstValue();
NAType *nullType =
b->child(0)->getValueId().getType().newCopy(CmpCommon::statementHeap());
nullType->setNullable(TRUE);
nullVal = new (CmpCommon::statementHeap())
Cast(nullVal, nullType);
nullVal->synthTypeAndValueId();
for(CollIndex i = 0; i < distValues; i++) {
if(i == 0)
transVals.insert(b->child(0));
else
transVals.insert(nullVal->getValueId());
}
ValueIdUnion *valVidu = new(CmpCommon::statementHeap())
ValueIdUnion(transVals, NULL_VALUE_ID);
valVidu->synthTypeAndValueId();
xPose->transUnionVector()[1].insert(valVidu->getValueId());
if((a->getOperatorType() == ITM_COUNT) &&
!(b->child(0)->getValueId().getType().supportsSQLnullLogical())) {
b->setOperatorType(ITM_COUNT_NONULL);
}
b->child(0) = valVidu->getValueId();
// Redrive type of aggregate since child is now nullable.
//
b->synthTypeAndValueId(TRUE);
distNum++;
}
CMPASSERT(xPose &&
(distNum == distValues + nonDistinctAggregates.entries()));
}
// The query contains "real" DISTINCT aggregates that require another
// groupby operator (lowerGB).
// - For all non-distinct aggregate functions, rewrite them into two,
// one for the upper node and one for the lower node.
ValueIdList lowerValueIds;
ValueIdList upperValueIds;
for (CollIndex n = 0; n < nonDistinctAggregates.entries(); n++)
{
x = nonDistinctAggregates[n];
Aggregate *a = (Aggregate *) x.getItemExpr();
// Split the aggregate up in two parts, one to be evaluated
// in the lower, and the other to be evaluated in the upper
// groupby. Also, replace the output value id of the node
// with the rewritten function.
ValueId newAggResult =
a->rewriteForStagedEvaluation
(lowerValueIds,upperValueIds, TRUE)->getValueId();
x = origNonDistinctAggregates[n];
topMap->addMapEntry(x,newAggResult);
}
// add the lower and upper valueIds to the appropriate sets
Int32 valusIdIdx = 0;
for (;
valusIdIdx < (Int32)lowerValueIds.entries(); valusIdIdx++)
lowerGB->aggregateExpr() += lowerValueIds[valusIdIdx];
for (valusIdIdx = 0; valusIdIdx < (Int32)upperValueIds.entries(); valusIdIdx++)
upperGB->aggregateExpr() += upperValueIds[valusIdIdx];
// Set aggDistElimRuleCreates flag to TRUE for both new GroupByAgg exprs.
// When this flag is set TRUE, "MVQR GroupBy Rewrite Rule" will not be
// fired.
lowerGB->setAggDistElimRuleCreates(TRUE);
upperGB->setAggDistElimRuleCreates(TRUE);
// Set the grouping expression for the lower groupby: it has the
// original grouping expressions plus those expressions that are
// used in the DISTINCT aggregates. This means that we are effectively
// doing the duplicate elimination that the DISTINCT in the original
// aggregate function wanted.
lowerGB->groupExpr() = upperGB->groupExpr();
// may have to expand multi-valued item lists in the distinct columns $$$$
lowerGB->groupExpr() += toBeMadeDistinct;
// Map the HAVING predicate using the map from the newly allocated
// MapValueIds node. This used to be done by attaching the HAVING
// predicate to the mvi node and letting pushdown do the mapping,
// then asserting that the predicate did, in fact, get pushed down.
// The problem with this is that if the predicate only has outer
// references and does not reference any columns that are output
// by the child of the mvi then the predicate will not be pushed
// down (see VegPredicate::isCovered). An example predicate is
// t1.vch7 = 'b', where t1.vch7 is an outer reference.
ValueIdSet selPredsRewritten;
ValueIdSet selPreds = upperGB->selectionPred();
topMap->getMap().rewriteValueIdSetDown(selPreds,selPredsRewritten);
upperGB->selectionPred() = selPredsRewritten;
// Prime the group attributes for the newly introduced nodes
topMap->allocateAndPrimeGroupAttributes();
topMap->pushdownCoveredExpr
(topMap->getGroupAttr()->getCharacteristicOutputs(),
topMap->getGroupAttr()->getCharacteristicInputs(),
topMap->selectionPred());
// Refine the Characteristic Inputs and Outputs of the lower GroupBy.
// and refine the Characteristic Inputs and Outputs of the lower
// GroupBy.
// Pushdown HAVING predicates from upperGB to lowerGB, if possible.
// Refine the Characteristic Inputs and Outputs of lowerGB.
upperGB->pushdownCoveredExpr
(upperGB->getGroupAttr()->getCharacteristicOutputs(),
upperGB->getGroupAttr()->getCharacteristicInputs(),
upperGB->selectionPred());
// pushdownCoveredExpr() is not called on the lower GroupBy because
// its child should be a CutOp; it is not legal to pushdown a
// predicate and modify the Group Attributes of the Cascades GROUP
// that it belongs to.
if(xPose) {
lowerGB->pushdownCoveredExpr
(lowerGB->getGroupAttr()->getCharacteristicOutputs(),
lowerGB->getGroupAttr()->getCharacteristicInputs(),
lowerGB->selectionPred());
}
topMap->child(0)->synthLogProp();
return topMap;
}
NABoolean AggrDistinctEliminationRule::canMatchPattern (
const RelExpr * pattern) const
{
// consider the both the substitute (a map value ids) and its child
// as possible output patterns
return (getSubstitute()->getOperator().match(pattern->getOperator()) OR
getSubstitute()->child(0)->getOperator().match(pattern->getOperator()));
}
// -----------------------------------------------------------------------
// GroupBySplitRule
// -----------------------------------------------------------------------
GroupBySplitRule::~GroupBySplitRule() {}
NABoolean GroupBySplitRule::topMatch(RelExpr * expr,
Context * context)
{
// check if this rule has been disabled via RuleGuidanceCQD
// the CQD is COMP_INT_77 and it represents a bitmap
// below we check if the bit # 5 is ON
if(CURRSTMT_OPTDEFAULTS->isRuleDisabled(5))
return FALSE;
if (NOT Rule::topMatch(expr,context))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *bef = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// See description above or in RelGrby.h.
// ---------------------------------------------------------------------
if (NOT bef->isNotAPartialGroupBy())
return FALSE;
// ---------------------------------------------------------------------
// Examine the aggregate functions. If any one of them cannot be
// evaluated in stages, such as a partial aggregation followed
// by finalization, then do not split this GroupByAgg.
// ---------------------------------------------------------------------
if (NOT bef->aggregateEvaluationCanBeStaged())
return FALSE;
ValueIdSet distinctColumns; // columns (exprs) referenced
// in non-redundant DISTINCT
// aggregates
const ValueIdSet &aggrs = bef->aggregateExpr();
for (ValueId x = aggrs.init(); aggrs.next(x); aggrs.advance(x))
{
Aggregate *agg = (Aggregate *) x.getItemExpr();
CMPASSERT(x.getItemExpr()->isAnAggregate());
if (agg->isDistinct())
{
ValueIdSet uniqueSet = bef->groupExpr();
uniqueSet += agg->getDistinctValueId();
if (NOT bef->child(0).getGroupAttr()->isUnique(uniqueSet))
distinctColumns += agg->getDistinctValueId();
}
}
Lng32 dc = distinctColumns.entries();
if (dc > 0)
{
// If there exists an aggregate distinct, fire the
// AggrDistinctEliminationRule instead.
return FALSE;
}
// Do not split the group by if it can be eliminated
if (NOT bef->groupExpr().isEmpty())
return NOT (bef->child(0).getGroupAttr()->isUnique(bef->groupExpr()));
return TRUE;
} // GroupBySplitRule::topMatch()
RelExpr * GroupBySplitRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & memory)
{
GroupByAgg *bef = (GroupByAgg *) before;
MapValueIds *topMap;
GroupByAgg *upperGB;
GroupByAgg *lowerGB;
const ValueIdSet &aggrs = bef->aggregateExpr();
ValueIdSet toBeMadeDistinct;
ValueIdSet nonDistinctAggregates;
// Create the default substitute, two groupbys stacked on each other
// with a MapValueIds on the very top
topMap = (MapValueIds *) Rule::nextSubstitute(before,NULL,memory);
upperGB = (GroupByAgg *) topMap->child(0).getPtr();
lowerGB = (GroupByAgg *) upperGB->child(0).getPtr();
// Mark the two new GroupBy operators in the substitute as
// split GroupBy operators (see comments in RelGrby.h). This
// is done in order to prevent further splitting by the application
// of the GroupBySplitRule.
assert(bef->isNotAPartialGroupBy() OR bef->isAPartialGroupByLeaf1());
if (bef->isNotAPartialGroupBy())
{
upperGB->markAsPartialGroupByRoot();
lowerGB->markAsPartialGroupByLeaf1();
}
else
{
upperGB->markAsPartialGroupByNonLeaf();
lowerGB->markAsPartialGroupByLeaf2();
}
// The grouping and aggregate expressions have already been
// assigned to upperGB. This is because the pattern
// for this rule contains a WildCardOp. The WildCardOp provides the
// GroupByAgg pointed to by bef to copyTopNode as the instance
// to be copied.
// Clear the upper aggregate expressions, they will be rewritten.
upperGB->aggregateExpr().clear();
// rewrite the DISTINCT aggregate functions by eliminating the DISTINCT
// - For each DISTINCT aggregate in the original list, add a copy
// of that function to the result, but without the DISTINCT.
ValueId x = aggrs.init();
for (; aggrs.next(x); aggrs.advance(x))
{
Aggregate *a = (Aggregate *) x.getItemExpr();
NABoolean reallyDistinct = FALSE;
if (a->isDistinct())
{
ValueIdSet uniqueSet = bef->groupExpr();
uniqueSet += a->getDistinctValueId();
if (NOT bef->child(0).getGroupAttr()->isUnique(uniqueSet))
reallyDistinct = TRUE;
}
if (reallyDistinct)
{
// move a copy of this aggregate to the top groupby
// and remove the DISTINCT
Aggregate *b = (Aggregate *)
a->copyTopNode(0, CmpCommon::statementHeap());
b->setDistinct(FALSE);
b->child(0) = a->child(0);
b->synthTypeAndValueId();
// collect all those arguments of DISTINCT groupbys that
// really need to be made distinct, and ignore those that
// already are distinct
toBeMadeDistinct += a->getDistinctValueId();
upperGB->aggregateExpr() += b->getValueId();
// indicate that the result that is supposed to be stored in
// x is now delivered through the rewritten aggregate function
topMap->addMapEntry(x,b->getValueId());
}
else
{
nonDistinctAggregates += x;
}
}
// The query contains "real" DISTINCT aggregates that require another
// groupby operator (lowerGB).
// - For all non-distinct aggregate functions, rewrite them into two,
// one for the upper node and one for the lower node.
ValueIdList lowerValueIds;
ValueIdList upperValueIds;
for (x = nonDistinctAggregates.init();
nonDistinctAggregates.next(x);
nonDistinctAggregates.advance(x))
{
Aggregate *a = (Aggregate *) x.getItemExpr();
// Split the aggregate up in two parts, one to be evaluated
// in the lower, and the other to be evaluated in the upper
// groupby. Also, replace the output value id of the node
// with the rewritten function.
ValueId newAggResult =
a->rewriteForStagedEvaluation
(lowerValueIds,upperValueIds, TRUE)->getValueId();
topMap->addMapEntry(x,newAggResult);
}
// add the lower and upper valueIds to the appropriate sets
Int32 valusIdIdx = 0;
for (;
valusIdIdx < (Int32)lowerValueIds.entries(); valusIdIdx++)
lowerGB->aggregateExpr() += lowerValueIds[valusIdIdx];
for (valusIdIdx = 0; valusIdIdx < (Int32)upperValueIds.entries(); valusIdIdx++)
upperGB->aggregateExpr() += upperValueIds[valusIdIdx];
// Set the grouping expression for the lower groupby: it has the
// original grouping expressions plus those expressions that are
// used in the DISTINCT aggregates. This means that we are effectively
// doing the duplicate elimination that the DISTINCT in the original
// aggregate function wanted.
lowerGB->groupExpr() = upperGB->groupExpr();
// may have to expand multi-valued item lists in the distinct columns $$$$
lowerGB->groupExpr() += toBeMadeDistinct;
// Map the HAVING predicate using the map from the newly allocated
// MapValueIds node. This used to be done by attaching the HAVING
// predicate to the mvi node and letting pushdown do the mapping,
// then asserting that the predicate did, in fact, get pushed down.
// The problem with this is that if the predicate only has outer
// references and does not reference any columns that are output
// by the child of the mvi then the predicate will not be pushed
// down (see VegPredicate::isCovered). An example predicate is
// t1.vch7 = 'b', where t1.vch7 is an outer reference.
ValueIdSet selPredsRewritten;
ValueIdSet selPreds = upperGB->selectionPred();
topMap->getMap().rewriteValueIdSetDown(selPreds,selPredsRewritten);
upperGB->selectionPred() = selPredsRewritten;
// Prime the group attributes for the newly introduced nodes
topMap->allocateAndPrimeGroupAttributes();
topMap->pushdownCoveredExpr
(topMap->getGroupAttr()->getCharacteristicOutputs(),
topMap->getGroupAttr()->getCharacteristicInputs(),
topMap->selectionPred());
// Refine the Characteristic Inputs and Outputs of the lower GroupBy.
// and refine the Characteristic Inputs and Outputs of the lower
// GroupBy.
// Pushdown HAVING predicates from upperGB to lowerGB, if possible.
// Refine the Characteristic Inputs and Outputs of lowerGB.
upperGB->pushdownCoveredExpr
(upperGB->getGroupAttr()->getCharacteristicOutputs(),
upperGB->getGroupAttr()->getCharacteristicInputs(),
upperGB->selectionPred());
// pushdownCoveredExpr() is not called on the lower GroupBy because
// its child should be a CutOp; it is not legal to pushdown a
// predicate and modify the Group Attributes of the Cascades GROUP
// that it belongs to.
topMap->child(0)->synthLogProp();
return topMap;
}
Int32 GroupBySplitRule::promiseForOptimization(RelExpr *,
Guidance *,
Context *)
{
return DefaultTransRulePromise;
}
NABoolean GroupBySplitRule::canMatchPattern (const RelExpr * pattern) const
{
// consider the both the substitute (a map value ids) and its child
// as possible output patterns
return (getSubstitute()->getOperator().match(pattern->getOperator()) OR
getSubstitute()->child(0)->getOperator().match(pattern->getOperator()));
}
// -----------------------------------------------------------------------
// methods for class GroupByOnJoinRule
// -----------------------------------------------------------------------
GroupByOnJoinRule::~GroupByOnJoinRule() {}
NABoolean GroupByOnJoinRule::topMatch (RelExpr * expr,
Context * /*context*/)
{
// check if this rule has been disabled via RuleGuidanceCQD
// the CQD is COMP_INT_77 and it represents a bitmap
// below we check if the bit # 3 is ON
if(CURRSTMT_OPTDEFAULTS->isRuleDisabled(3))
return FALSE;
if (NOT Rule::topMatch(expr,NULL))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *bef = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// If this GroupByAgg was created by the GroupBySplitRule, no
// further transformation rules should be applied to it. The
// GroupBySplitRule replaces GB(CutOp) with a GB(GB(CutOp)).
// Our intention is to apply the relevant transformation rules
// to the original pattern but not to its substitute so that each
// GroupByAgg in the substitute is subjected to implementation
// rules only.
// ---------------------------------------------------------------------
if (NOT bef->isNotAPartialGroupBy())
return FALSE;
// Do not split the group by if it can be eliminated
if (NOT bef->groupExpr().isEmpty())
{
// do not eliminate group by if rollup is being done
if (bef->isRollup())
return FALSE;
return NOT (bef->child(0).getGroupAttr()->isUnique(bef->groupExpr()));
}
// the functional dependencies shown below won't hold if this is an
// aggregate (ok, there are some sick examples where they do, but
// those aren't important for a commercial DBMS)
return NOT bef->groupExpr().isEmpty();
}
RelExpr * GroupByOnJoinRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & /*memory*/)
{
// ---------------------------------------------------------------------
// The idea is to transform a query with a groupby on top of a join
// into a query with the groupby below the join:
//
// t1.grcol,t2.grcol pred(t1.jcol,t2.jcol)
// grby t1.aggr,t2.aggr join
// | / \
// | pred(t1.jcol,t2.jcol) t1.grcol,t1.jcol / \
// join ----> t1.aggr grby t2
// / \ |
// / \ |
// t1 t2 t1
//
// There are three conditions for firing the rule:
//
// (1) The grouping expressions can be separated into a part
// t1.grcol that is covered by t1 and a part t2.grcol that
// is covered by t2.
//
// (2) The aggregate functions only reference values from t1.
//
// (3) In the result of the join, the groupby columns
// (t1.grcol,t2.grcol) contain or cover the join predicate
// values from t1, and a candidate-key of t2.
// Expressed as functional dependency that looks like
//
// (t1.grcol,t2.grcol) ----> (t1.jcol,t2.key).
//
// Further info can be found in a paper published by Weipeng
// Paul Yan and Per-Ake Larson in the proceedings of the 10th
// IEEE Data Engineering Conference, Houston, TX, 1994, pp. 89-100.
// http://www.informatik.uni-trier.de/~ley/db/conf/icde/YanL94.html
//
// Besides the substitute shown above, the following substitutes are
// also generated in some cases:
//
// join
// / \ if the aggregate functions only reference
// / \ values from t2 and if the join commutativity
// t1 grby cannot be applied on the join between t1 and t2
// |
// | (needed if we restrict the commutativity rule)
// t2
//
// ---------------------------------------------------------------------
CMPASSERT(before->getOperatorType() == REL_GROUPBY);
GroupByAgg *oldGB = (GroupByAgg *) before;
Join *oldJoin = (Join *) before->child(0).getPtr();
// Check to see whether we should apply any more transformation rules on this
// join child.
if (oldJoin->isTransformComplete())
return NULL;
const ValueIdSet &aggrs = oldGB->aggregateExpr();
ValueIdSet grcol(oldGB->groupExpr());
NABoolean reverseT1T2 = FALSE;
// to be used for coverTest
ValueIdSet referencedInputs;
ValueIdSet coveredExpr;
ValueIdSet coveredSubExpr;
// ---------------------------------------------------------------------
// check condition (1), can grouping cols be separated?
// ---------------------------------------------------------------------
ValueIdSet t1grcol;
ValueIdSet t2grcol;
referencedInputs.clear();
if (NOT grcol.isEmpty())
{
// which grouping columns are covered by t1?
oldJoin->child(0).getGroupAttr()->coverTest(
grcol,
oldGB->getGroupAttr()->getCharacteristicInputs(),
t1grcol,
referencedInputs);
// if not all of them are covered by t1, check which
// ones are covered by t2
if (t1grcol != grcol)
{
referencedInputs.clear();
oldJoin->child(1).getGroupAttr()->coverTest(
grcol,
oldGB->getGroupAttr()->getCharacteristicInputs(),
t2grcol,
referencedInputs);
// all remaining groupby columns have to be covered by t2!!
// note that we ignored any covered subexpressions, we are
// only interested in groupby expressions that are covered
// as a whole
ValueIdSet rest(grcol);
rest -= t1grcol;
rest -= t2grcol;
if (NOT rest.isEmpty())
return NULL;
}
}
// ---------------------------------------------------------------------
// check condition (2), do the aggregates only reference one table?
// ---------------------------------------------------------------------
ValueIdSet aggrInputs;
oldGB->getValuesRequiredForEvaluatingAggregate(aggrInputs);
// are the children covered by the left table, t1?
coveredExpr.clear();
referencedInputs.clear();
oldJoin->child(0).getGroupAttr()->coverTest(
aggrInputs,
oldGB->getGroupAttr()->getCharacteristicInputs(),
coveredExpr,
referencedInputs);
if (coveredExpr != aggrInputs)
{
// no, children aren't covered by t1, try the right child, t2
coveredExpr.clear();
referencedInputs.clear();
oldJoin->child(1).getGroupAttr()->coverTest(
aggrInputs,
oldGB->getGroupAttr()->getCharacteristicInputs(),
coveredExpr,
referencedInputs);
if (coveredExpr == aggrInputs)
// All aggregates are covered by table t2, try it the reverse way.
reverseT1T2 = TRUE;
else
// sorry, aggregates reference both input tables
return NULL;
}
if (reverseT1T2)
{
// don't fire the rule with reversed roles if we can apply the
// join commutativity rule on the join and the CQD is OFF
if (oldJoin->getGroupAttr()->getNumJoinedTables() <= 2 &&
CmpCommon::getDefault(GROUP_BY_PUSH_TO_BOTH_SIDES_OF_JOIN) != DF_ON)
return NULL;
// don't reverse the roles if the join isn't symmetric (such as a
// left join, semi-join, or TSJ)
if (oldJoin->getOperatorType() != REL_JOIN)
return NULL;
}
// ---------------------------------------------------------------------
// compute t1.jcol, t2.jcol
// ---------------------------------------------------------------------
// The variable joinPreds should contain both selection predicate
// and the join predicate. -Genesis Case 10-981103-1385
ValueIdSet joinPreds(oldJoin->selectionPred());
joinPreds += oldJoin->joinPred();
ValueIdSet t1jcol;
ValueIdSet t2jcol;
if (NOT (oldJoin->isTSJ()))
{
// Special handling of VEGPredicates:a VEGPredicate in the join predicate
// means that a VEGReference should be covered by both input tables.
// Convert all VEGPredicates into VEGReferences.
for (ValueId x = joinPreds.init(); joinPreds.next(x); joinPreds.advance(x))
{
if (x.getItemExpr()->getOperatorType() == ITM_VEG_PREDICATE)
{
VEGPredicate *v = (VEGPredicate *) x.getItemExpr();
joinPreds -= x;
joinPreds += v->getVEG()->getVEGReference()->getValueId();
}
}
if (NOT joinPreds.isEmpty())
{
// Which grouping columns are covered by t1 and t2?
referencedInputs.clear();
coveredSubExpr.clear();
oldJoin->child(0).getGroupAttr()->coverTest(
joinPreds,
oldGB->getGroupAttr()->getCharacteristicInputs(),
t1jcol,
referencedInputs,
&coveredSubExpr);
t1jcol += coveredSubExpr;
referencedInputs.clear();
coveredSubExpr.clear();
oldJoin->child(1).getGroupAttr()->coverTest(
joinPreds,
oldGB->getGroupAttr()->getCharacteristicInputs(),
t2jcol,
referencedInputs,
&coveredSubExpr);
t2jcol += coveredSubExpr;
}
}
else
{
// For TSJs, calculate the t1jcol by subtracting the char. inputs of
// the join from the char. inputs of t2. The ones remaining are guaranteed
// to be those input values coming from t1.
// No need to compute t2jcol since we can't reverseT1T2 for a TSJ.
t1jcol = oldJoin->child(1).getGroupAttr()->getCharacteristicInputs();
t1jcol -= oldJoin->getGroupAttr()->getCharacteristicInputs();
}
// ---------------------------------------------------------------------
// check condition (3), the functional dependencies
// ---------------------------------------------------------------------
// Is t1jcol covered by the grouping columns?
// Initialize a group attributes object with the grouping columns as its
// characteristic outputs. If t1jcol or t2jcol is covered by those empty
// group attributes, it can be computed from grcol.
GroupAttributes gra;
const ValueIdSet *tjcol;
if (reverseT1T2)
tjcol = &t2jcol;
else
tjcol = &t1jcol;
// add all columns that are functionally dependent on grcol,
// this will increase the chance that we cover the join columns
const ValueIdSet &constr = oldGB->getGroupAttr()->getConstraints();
for (ValueId v = constr.init(); constr.next(v); constr.advance(v))
{
if (v.getItemExpr()->getOperatorType() == ITM_FUNC_DEPEND_CONSTRAINT)
{
FuncDependencyConstraint *fdc =
(FuncDependencyConstraint *) v.getItemExpr();
if (grcol.contains(fdc->getDeterminingCols()))
grcol += fdc->getDependentCols();
}
}
gra.addCharacteristicOutputs(grcol);
coveredExpr.clear();
referencedInputs.clear();
gra.coverTest(*tjcol,
oldGB->getGroupAttr()->getCharacteristicInputs(),
coveredExpr,
referencedInputs);
if (coveredExpr != *tjcol)
return NULL;
// get to the constraints for t2 (check whether we reversed the
// roles of t1 and t2)
const GroupAttributes *cga;
if (reverseT1T2)
cga = oldJoin->child(0).getGroupAttr();
else
cga = oldJoin->child(1).getGroupAttr();
// is t2.key functionally dependent on the grouping columns?
if (NOT cga->isUnique(grcol))
{
// this simple test for uniqueness didn't work, try walking
// through functional dependency constraints of the join
// to see whether we can show a functional dependency
// grcol --> t2.key.
// NOTE: We also try to put the functional dependency together
// from multiple constraints, e.g. (a,b,c) --> (d,e,f) could be
// derived from the following two FD constraints:
// (a,b) --> d
// (a,b,c) --> (e,f)
const ValueIdSet &cc =
oldJoin->getGroupAttr()->getConstraints();
ValueIdSet grcolPlusDependents(grcol);
for (ValueId ccv = cc.init(); cc.next(ccv); cc.advance(ccv))
{
if (ccv.getItemExpr()->getOperatorType() ==
ITM_FUNC_DEPEND_CONSTRAINT)
{
FuncDependencyConstraint *ccfd =
(FuncDependencyConstraint *) ccv.getItemExpr();
if (grcolPlusDependents.contains(ccfd->getDeterminingCols()))
{
grcolPlusDependents += ccfd->getDependentCols();
}
}
}
// part of fix to allow GroupBySplitRule and GroupByOnJoinRule
// to do eager aggregation on BP wellevent queries
// SELECT ..., AVG(p.PI_VALUE) AS AverageofPIValues, ...
// FROM Equipment e INNER JOIN PI_VALUE p
// ON e.EXP_EP = p.EXP_EP
// AND e.EXP_DWGOM = p.EXP_DWGOM
// AND e.Equipment_Location = p.PI_TAG
// WHERE ...
// GROUP BY p.PI_TAG, e.API, e.EXP_EP, e.EXP_DWGOM;
// The immediate cause for mxcmp's inability to push groupby below
// the join is cga->isUnique(grcolPlusDependents) returns FALSE
// even though "grcol --> t2.key" is true for the BP query.
// The root cause of the problem is the prevention of VEG formation
// for the "e.Equipment_Location = p.PI_TAG" varchar join predicate.
// We really want VEGs to form even for varchar equality predicates,
// but, until that happens, we compensate for the missing VEG.
// That is, in order to establish that
// (p.pi_tag, e.api, e.exp_ep, e.exp_dwgom) -->
// (e.exp_ep, e.exp_dwgom, e.equipment_location)
// we introduce the missing VEG for the equijoin predicate
// "p.pi_tag = e.equipment_location"
if (CmpCommon::getDefault(COMP_BOOL_177) == DF_OFF)
joinPreds.introduceMissingVEGRefs(grcolPlusDependents);
// To test grcol --> t2.key we want the left side of the
// functional dependency to be grcol and all of its dependent
// values, and we want some key or candidate key (a unique set
// of columns) on the right. The actual test is done by checking
// uniqueness of grcolPlusDependents.
if (NOT cga->isUnique(grcolPlusDependents))
{
// sorry, the rule is not applicable since we cannot prove that
// t2.key is functionally dependent on the grouping columns
return NULL;
}
}
// ---------------------------------------------------------------------
// conditions (1), (2), and (3) are met, now create the substitute
// ---------------------------------------------------------------------
Join *newJoin = (Join *)
oldJoin->copyTopNode(0, CmpCommon::statementHeap());
GroupByAgg *newGB = (GroupByAgg *)
oldGB->copyTopNode(0, CmpCommon::statementHeap());
newJoin->setGroupAttr(before->getGroupAttr());
newGB->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
// Set groupByOnJoinRuleCreates flag to TRUE for new group by expr.
// When this flag is set TRUE, "MVQR GroupBy Rewrite Rule" will not be
// fired.
newGB->setGroupByOnJoinRuleCreates(TRUE);
// -- pruning based on potential -- begin
// combinedPotential determines if a task on a expression is pruned
// CURRSTMT_OPTDEFAULTS->pruneByOptLevel
Int32 oldJoinCombinedPotential =
oldJoin->getPotential() + oldJoin->getGroupAttr()->getPotential();
Int32 originalGroupPotential = before->getGroupAttr()->getPotential();
// newJoin should have the same combined potential as the oldJoin
Int32 newJoinPotential = oldJoinCombinedPotential - originalGroupPotential;
newJoin->setPotential(newJoinPotential);
// newGB should have the same combined potential as the new join, this is
// because if tasks on one of them (i.e. newJoin, newGB) are pruned then
// tasks on both of them should be pruned, otherwise the work done is useless
// since the tasks will not contribute toward getting a full plan
newGB->setPotential(newJoinPotential);
newGB->getGroupAttr()->updatePotential(originalGroupPotential);
// -- pruning based on potential -- end
// move any HAVING predicates from the GB node to the join node
newJoin->selectionPred() += newGB->getSelectionPred();
newGB->selectionPred().clear();
// put the substitute tree together
ValueIdSet constantValues ;
if (reverseT1T2)
{
// reversed roles, produce join(cut(0),grby(cut(1)))
newJoin->child(0) = oldJoin->child(0).getPtr();
newJoin->child(1) = newGB;
newGB->child(0) = oldJoin->child(1).getPtr();
newGB->aggregateExpr() = aggrs;
newGB->groupExpr() = t2grcol;
t2jcol.getConstants(constantValues);
t2jcol.remove(constantValues);
newGB->groupExpr() += t2jcol;
}
else
{
// produce join(grby(cut(0)),cut(1))
newJoin->child(0) = newGB;
newJoin->child(1) = oldJoin->child(1).getPtr();
newGB->child(0) = oldJoin->child(0).getPtr();
newGB->aggregateExpr() = aggrs;
newGB->groupExpr() = t1grcol;
t1jcol.getConstants(constantValues);
t1jcol.remove(constantValues);
newGB->groupExpr() += t1jcol;
}
// ---------------------------------------------------------------------
// Don't create a groupby node with no grouping columns. The definition
// of such a node that this means computing an aggregate and always
// returning a row doesn't work for this query transformation. We need
// one row back if there is data in the table, otherwise we need 0 rows.
// Add the predicate "count(*) > 0" and the aggregate "count(*)" to the
// empty groupby node.
// $$$$ re-introduce the REL_AGGREGATE id and change the generator to
// return 0 rows for a groupby with an empty input table. Then delete
// the code below.
// ---------------------------------------------------------------------
if (newGB->groupExpr().isEmpty())
{
ValueId valId;
for (valId = newGB->aggregateExpr().init();
newGB->aggregateExpr().next(valId);
newGB->aggregateExpr().advance(valId))
{
// this aggregate has now become a scalar aggregate.
// Re-type it since a scalar aggr has different attributes
// than a non-scalar aggregate (scalar aggr are nullable).
if ( (valId.getItemExpr()->getOperatorType() == ITM_MIN) ||
(valId.getItemExpr()->getOperatorType() == ITM_MAX) ||
(valId.getItemExpr()->getOperatorType() == ITM_SUM) )
{
// Need to set the flag inScalarGroupBy as this
// aggregate may be re-synthesized; or a derived aggregate
// may need to have this information. (case10-001227-0392)
// This needs to done irrespective of whether we
// re-synthesize the type now.
Aggregate * aggr = (Aggregate *)valId.getItemExpr();
aggr->setInScalarGroupBy();
if (NOT valId.getType().supportsSQLnull())
{
aggr->synthTypeAndValueId(TRUE /*resynthesize*/);
}
}
}
Aggregate *dummyAgg = new (CmpCommon::statementHeap())
Aggregate(ITM_COUNT,
new (CmpCommon::statementHeap()) SystemLiteral(1));
BiRelat *pred = new (CmpCommon::statementHeap())
BiRelat(ITM_GREATER,
dummyAgg,
new (CmpCommon::statementHeap()) SystemLiteral(0));
pred->synthTypeAndValueId();
newGB->aggregateExpr() += dummyAgg->getValueId();
newGB->selectionPred() += pred->getValueId();
// Add the ValueIds of the two constants generated here to
// the Charcateristic Inputs of newGB.
newGB->getGroupAttr()->addCharacteristicInputs
(dummyAgg->child(0)->getValueId());
newGB->getGroupAttr()->addCharacteristicInputs
(pred->child(1)->getValueId());
}
// ---------------------------------------------------------------------
// set up the group attributes and logical properties of the substitute
// ---------------------------------------------------------------------
newJoin->allocateAndPrimeGroupAttributes();
// Refine the Characteristic Inputs and Outputs of the join
newJoin->pushdownCoveredExpr
(newJoin->getGroupAttr()->getCharacteristicOutputs(),
newJoin->getGroupAttr()->getCharacteristicInputs(),
newJoin->selectionPred());
// pushdownCoveredExpr() is not called onthe GroupBy because
// its child should be a CutOp; it is not legal to pushdown a
// predicate and modify the Group Attributes of the Cascades GROUP
// that it belongs to.
// synthesize logical properties for the new GB node
newGB->synthLogProp();
newGB->getGroupAttr()->addToAvailableBtreeIndexes(
newGB->child(0).getGroupAttr()->getAvailableBtreeIndexes());
// Call synthLogProp on the result also so that we can call
// findEquiJoinPredicates() with the new set of predicates
newJoin->synthLogProp();
// If oldJoin resulted from a application of MJPrimeTableRule, directly
// or indirectly, we set newJoin to be also from MJPrimeTableRule.
if (oldJoin->isJoinFromPTRule())
newJoin->setJoinFromPTRule();
return newJoin;
}
// -----------------------------------------------------------------------
// Methods for class PartialGroupByOnTSJRule
// -----------------------------------------------------------------------
PartialGroupByOnTSJRule::~PartialGroupByOnTSJRule() {}
NABoolean PartialGroupByOnTSJRule::topMatch (RelExpr * expr,
Context * /*context*/)
{
// check if this rule has been disabled via RuleGuidanceCQD
// the CQD is COMP_INT_77 and it represents a bitmap
// below we check if the bit # 4 is ON
if(CURRSTMT_OPTDEFAULTS->isRuleDisabled(4))
return FALSE;
if (NOT Rule::topMatch(expr,NULL))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
GroupByAgg *bef = (GroupByAgg *) expr;
// ---------------------------------------------------------------------
// Fire this rule only for a leaf partial groupby (which has been created
// thru the SplitGroupByRule).
// ---------------------------------------------------------------------
if (NOT bef->isAPartialGroupByLeaf())
return FALSE;
// Do not fire this rule for aggregates only
if (bef->groupExpr().isEmpty())
return FALSE;
// If we get here, return TRUE
return TRUE;
}
// -----------------------------------------------------------------------
// PartialGroupByOnTSJRule
//
// This rule performs the following transformation:
//
// final GB final GB
// | |
// partial GB TSJ
// | / \
// TSJ Cut#0 partial GB
// / \ |
// Cut#0 Cut#1 Cut#1
//
// This transformation can be performed if the TSJ does not have the
// potential for increasing/reducing the data returned from the right
// child. Thus, LEFT nested joins as well as semi-nested joins are
// disqualified from this transformation. There is no restriction
// on the columns referenced in the partial GB, since the TSJ operator
// has the potential for "flowing" input values from the left child to
// the right child. Thus, partial GB operator can group on values
// produced from either Cut#0 or Cut#1 above.
//
// -----------------------------------------------------------------------
RelExpr * PartialGroupByOnTSJRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory * & /*memory*/)
{
CMPASSERT(before->getOperatorType() == REL_GROUPBY);
GroupByAgg *oldGB = (GroupByAgg *) before;
Join *oldJoin = (Join *) before->child(0).getPtr();
// Perform this transformation only for inner (non-semi) nested joins.
if (NOT oldJoin->isInnerNonSemiJoin()) return NULL;
GroupAttributes* TSJChild1GA =
oldJoin->child(1).getGroupAttr();
// Do not push the group by to the right leg of the TSJ if the
// grouping columns are unique on the right leg of the TSJ.
if (TSJChild1GA->isUnique(oldGB->groupExpr()))
return NULL;
// Do not push the group by to the right leg of the TSJ if the
// right leg is not a scan.
if (TSJChild1GA->getAvailableBtreeIndexes().isEmpty())
return NULL;
// HEURISTIC:
// Do not push the group by down if the TSJ output number of rows
// is less than or equal to 100 times the TSJ left child output number
// of rows. This is because we only want to push the group by down
// if each probe produces at least 100 rows, so that each partial
// grouping operation will be on at least 100 rows. This is not
// perfect - it doesn't take into account failed probes (i.e. probes
// that do not find a match). But it should be good enough.
if (oldJoin->getGroupAttr()->getResultCardinalityForEmptyInput() <=
(oldJoin->child(0).getGroupAttr()->getResultCardinalityForEmptyInput()
* 100.0))
return NULL;
// If the tsj performs any reduction of data, then do not perform
// this transformation. Pushing the partial groupby below the tsj
// may result in incorrect aggregate results. Check for any executor
// preds that reside on the TSJ node that have not been pushed down.
// Please note that VEG predicates (even though pushed) are still
// retained on parent nodes.
NABoolean executorPreds = FALSE;
const ValueIdSet & selPreds = oldJoin->getSelectionPred();
for (ValueId x = selPreds.init(); selPreds.next(x); selPreds.advance(x))
{
ItemExpr *ie = x.getItemExpr();
if (ie->getOperatorType() != ITM_VEG_PREDICATE)
{
executorPreds = TRUE;
return NULL;
}
}
// If the groupby is not likely to fit in DP2 memory and does not result in
// significant data reduction, there is no reason to push it past the join.
CostScalar afterGBCard =
oldGB->getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar beforeGBCard =
oldJoin->getGroupAttr()->getResultCardinalityForEmptyInput();
CostScalar reqGBReduction = CURRSTMT_OPTDEFAULTS->getReductionToPushGBPastTSJ();
if (reqGBReduction > 0.0 AND afterGBCard > beforeGBCard * reqGBReduction)
return NULL;
Join *newJoin = (Join *)oldJoin->copyTopNode(0, CmpCommon::statementHeap());
GroupByAgg *newGB = (GroupByAgg *)oldGB->copyTopNode(0, CmpCommon::statementHeap());
newJoin->setGroupAttr(before->getGroupAttr());
newGB->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes());
// Remember that this partial GB has been pushed below a TSJ by this rule.
newGB->gbAggPushedBelowTSJ() = TRUE;
// produce join(cut(1), grby(cut(1)))
newJoin->child(0) = oldJoin->child(0).getPtr();
newJoin->child(1) = newGB;
newGB->child(0) = oldJoin->child(1).getPtr();
// ---------------------------------------------------------------------
// set up the group attributes and logical properties of the substitute
// ---------------------------------------------------------------------
newJoin->allocateAndPrimeGroupAttributes();
// The pushed down partial groupby may require as potential inputs those
// outputs produced by the left child of the TSJ.
newGB->getGroupAttr()->addCharacteristicInputs
(((RelExpr *)newJoin->child(0))->getGroupAttr()->getCharacteristicOutputs());
// Refine the Characteristic Inputs and Outputs of the join
newJoin->pushdownCoveredExpr
(newJoin->getGroupAttr()->getCharacteristicOutputs(),
newJoin->getGroupAttr()->getCharacteristicInputs(),
newJoin->selectionPred());
// pushdownCoveredExpr() is not called onthe GroupBy because
// its child should be a CutOp; it is not legal to pushdown a
// predicate and modify the Group Attributes of the Cascades GROUP
// that it belongs to.
// synthesize logical properties for the new GB node
newGB->synthLogProp();
newGB->getGroupAttr()->addToAvailableBtreeIndexes(
newGB->child(0).getGroupAttr()->getAvailableBtreeIndexes());
return newJoin;
} // PartialGroupByOnTSJRule::nextSubstitute()
// -----------------------------------------------------------------------
// methods for class ShortCutGroupByRule
// -----------------------------------------------------------------------
ShortCutGroupByRule::~ShortCutGroupByRule() {}
NABoolean ShortCutGroupByRule::topMatch (RelExpr *expr,
Context * /*context*/)
{
if (NOT Rule::topMatch(expr,NULL))
return FALSE;
if (expr->getOperatorType() == REL_SHORTCUT_GROUPBY)
return FALSE;
// make sure we are looking at a group by node
CMPASSERT(expr->getOperatorType() == REL_GROUPBY);
// QSTUFF
CMPASSERT(NOT(expr->getGroupAttr()->isStream() OR
expr->getGroupAttr()->isEmbeddedUpdateOrDelete()));
// QSTUFF
// we are only interested in ShortCut aggregate here
GroupByAgg *grbyagg = (GroupByAgg *) expr;
const ValueIdSet &aggrs = grbyagg->aggregateExpr();
if (! grbyagg->groupExpr().isEmpty()) return FALSE;
if (aggrs.isEmpty()) return FALSE;
ValueId aggr_valueid = aggrs.init();
aggrs.next(aggr_valueid);
ItemExpr *item_expr = aggr_valueid.getItemExpr();
OperatorTypeEnum op = item_expr->getOperatorType();
if ( op!=ITM_MIN AND op!= ITM_MAX AND op!= ITM_ANY_TRUE) return FALSE;
//do not do min-max optimization if it is not a full group by or if
//default switches it off
if ( (op!=ITM_ANY_TRUE) AND (NOT grbyagg->isNotAPartialGroupBy()
OR NOT CURRSTMT_OPTDEFAULTS->isMinMaxConsidered()))
return FALSE;
if (aggrs.entries()>1) return FALSE;
// we are only interested in anytrue with inequality operator
if(op==ITM_ANY_TRUE)
{
ItemExpr* anytrue_expr = item_expr->child(0);
OperatorTypeEnum op = anytrue_expr->getOperatorType();
// ---------------------------------------------------------------------
// Perform this transformation if and only if the operand of the
// anyTrue is a predicate. When the GroupBySplitRule fires, it
// rewrites the anyTrue for staged evaluation and evaluates the
// anyTrue(p) first, followed by anyTrue(anyTrue(p)). The second
// aggregate should not be evalued using the ShortCutGroupBy.
// ---------------------------------------------------------------------
if ((NOT anytrue_expr->isAPredicate()) OR
op == ITM_EQUAL OR op == ITM_NOT_EQUAL)
return FALSE;
}
else
{
ItemExpr* simplifiedIE = item_expr->child(0)->simplifyOrderExpr();
OperatorTypeEnum op = simplifiedIE->getOperatorType();
if(op != ITM_VEG_REFERENCE AND
op != ITM_BASECOLUMN AND op!=ITM_INDEXCOLUMN) return FALSE;
}
return TRUE;
} // ShortCutGroupByRule::topMatch()
RelExpr * ShortCutGroupByRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory *& /*memory*/)
{
ShortCutGroupBy *result;
GroupByAgg *bef = (GroupByAgg *) before;
// create a shortcut groupby node
result = new(CmpCommon::statementHeap()) ShortCutGroupBy(bef->child(0));
// Copy over the groupbyagg private fields, then
// call the relexpr copytopnode to copy over all common fields.
(void) bef->copyTopNode(result, CmpCommon::statementHeap());
// init new private members
// the validation of anytrue_expr is done in topMatch
const ValueIdSet &aggrs = result->aggregateExpr();
ValueId aggrid = aggrs.init();
aggrs.next(aggrid);
ItemExpr * aggr = aggrid.getItemExpr();
OperatorTypeEnum main_op = aggr->getOperatorType();
if(main_op == ITM_MIN OR main_op == ITM_MAX)
{
// now set the group attributes of the result's top node
result->setGroupAttr(bef->getGroupAttr());
ItemExpr * min_max_expr = aggr->child(0);
result->set_lhs(NULL);
result->set_rhs(min_max_expr);
result->setIsNullable(min_max_expr->getValueId().getType().supportsSQLnullLogical());
result->setFirstNRows(1);
if(main_op == ITM_MIN )
{
result->setOptForMin(TRUE);
result->setOptForMax(FALSE);
}
else
{
result->setOptForMin(FALSE);
result->setOptForMax(TRUE);
if(result->isNullable()){
Filter * filter = new(CmpCommon::statementHeap()) Filter(result->child(0));
filter->setReattemptPushDown();
result->child(0) = filter;
ItemExpr * notNullExpr = new(CmpCommon::statementHeap())
UnLogic (ITM_IS_NOT_NULL,min_max_expr);
notNullExpr->synthTypeAndValueId();
// set the selection predicate for the filter
filter->selectionPred() += notNullExpr->getValueId();
result->allocateAndPrimeGroupAttributes();
ValueIdSet expr =
result->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet input = result->getGroupAttr()->getCharacteristicInputs();
result->pushdownCoveredExpr(expr, input,
result->selectionPred());
filter->synthLogProp();
}
}
}
else
{
ItemExpr *anytrue_expr = aggr->child(0);
ItemExpr *lhs = anytrue_expr->child(0);
ItemExpr *rhs = anytrue_expr->child(1);
OperatorTypeEnum op = anytrue_expr->getOperatorType();
result->set_lhs(lhs);
result->set_rhs(rhs);
result->setIsNullable(lhs->getValueId().getType().supportsSQLnullLogical() OR
rhs->getValueId().getType().supportsSQLnullLogical());
if (op == ITM_GREATER OR op == ITM_GREATER_EQ)
{
// now set the group attributes of the result's top node
result->setGroupAttr(bef->getGroupAttr());
result->setOptForMin(TRUE);
result->setOptForMax(FALSE);
if (result->isNullable() AND result->canApplyMdam())
{
// add a filter node with the selection predicate:
// (anytrue_expr is TRUE) OR (anytrue_expr is UNKNOWN)
// => (anytrue_expr is NOT FALSE)
// introduce a filter node
// note that without MDAM-like access method,
// the introduction of the input node in the selection pred.
// of the filter could result in materialization for each input
Filter *filter = new(CmpCommon::statementHeap())
Filter(result->child(0));
filter->setReattemptPushDown();
result->child(0) = filter;
// synthesis the selection predicate from anytrue_expr
ItemExpr *rootptr = new(CmpCommon::statementHeap())
UnLogic (ITM_IS_FALSE,
anytrue_expr);
rootptr = new(CmpCommon::statementHeap()) UnLogic(ITM_NOT, rootptr);
rootptr->synthTypeAndValueId();
// set the selection predicate for the filter
filter->selectionPred() += rootptr->getValueId();
result->allocateAndPrimeGroupAttributes();
ValueIdSet relExpr =
result->getGroupAttr()->getCharacteristicOutputs();
ValueIdSet input = result->getGroupAttr()->getCharacteristicInputs();
result->pushdownCoveredExpr(relExpr, input,
result->selectionPred() );
}
}
else if (op == ITM_LESS OR op == ITM_LESS_EQ)
{
// do not set result group attributes to that of before, later we would
// set MapValueIds group attributes to that of RelExpr before
result->setOptForMin(FALSE);
result->setOptForMax(TRUE);
// get a copy of anytrue_aggr
// and replace the operator by ITM_ANY_TRUE_MAX
ItemExpr *new_anytrue_aggr =
new(CmpCommon::statementHeap()) Aggregate(ITM_ANY_TRUE_MAX,
aggr->child(0));
new_anytrue_aggr->synthTypeAndValueId();
result->aggregateExpr().clear();
result->aggregateExpr() += new_anytrue_aggr->getValueId();
// mapping the new ValueId into the old one so that
// all the references of the old aggregate expression maps
// to the new ValueId
// also MapAndRewrite the selection pred containing the
// old (upper) ValueId
MapValueIds *mvi = new(CmpCommon::statementHeap()) MapValueIds(result);
mvi->addMapEntry(aggr->getValueId(), // upper ValueID
new_anytrue_aggr->getValueId()); // lower(new) ValueID
result->selectionPred().clear();
// map the selection predicates down over the map
for (ValueId x = bef->selectionPred().init();
bef->selectionPred().next(x);
bef->selectionPred().advance(x))
{
result->selectionPred() +=
x.getItemExpr()->mapAndRewrite(mvi->getMap(),TRUE);
}
// Genesis case: 10-010315-1747. Synthesizing MapValueId outputs correctly
// MapValueIds should produce uppervalues as output, if required.
// This also fixes genesis case 10-010320-1817.
// ValueIdSet valuesForRewrite;
mvi->setGroupAttr(bef->getGroupAttr());
// set group attributes for children
result->getGroupAttr()->addCharacteristicInputs(
bef->getGroupAttr()->getCharacteristicInputs());
ValueIdSet resultOutputs;
// Map the outputs of the groupby to be in terms of the new values.
//
mvi->getMap().
rewriteValueIdSetDown(bef->getGroupAttr()->getCharacteristicOutputs(),
resultOutputs);
result->getGroupAttr()->addCharacteristicOutputs(resultOutputs);
// Removed 10/10/16 as part of fix for TRAFODION-2127
// These values were not used in MapValueIds::preCodeGen.
// Could consider adding this if there are issue in preCodeGen.
// bef->getValuesRequiredForEvaluatingAggregate(valuesForRewrite);
// mvi->addValuesForVEGRewrite(valuesForRewrite);
// perform synthesis on the new child node
mvi->child(0)->synthLogProp();
result = (ShortCutGroupBy *) mvi;
// currently this rule does not use memory
// CMPASSERT(memory == NULL);
}
else
{
result->setGroupAttr(bef->getGroupAttr());
}
}
return result;
} // ShortCutGroupByRule::nextSubstitute()
NABoolean ShortCutGroupByRule::canMatchPattern(
const RelExpr * /*pattern*/) const
{
// The ShortCutGroupByRule can generate potentially several different
// expressions. So, just return TRUE for now.
return TRUE;
}
// -----------------------------------------------------------------------
// methods for class CommonSubExprRule
// -----------------------------------------------------------------------
CommonSubExprRule::~CommonSubExprRule() {}
RelExpr * CommonSubExprRule::nextSubstitute(RelExpr * before,
Context * /*context*/,
RuleSubstituteMemory *& /*memory*/)
{
// eliminate this node
return before->child(0);
}
NABoolean CommonSubExprRule::canMatchPattern(
const RelExpr * /*pattern*/) const
{
// The CommonSubExprRule can potentially help with nearly any pattern
return TRUE;
}
// -----------------------------------------------------------------------
// methods for class SampleScanRule
// -----------------------------------------------------------------------
// The promiseForOptimization for the SampleScanRule (CLUSTER
// sampling) has been raised to force the
// SampleScanRule::nextSubstitute (but not necessarily the topMatch
// method) to be applied before the PhysicalSampleRule. This allows
// us to disable the PhysicalSampleRule if the SampleScanRule
// succeeds. If the SampleScanRule fails, then the PhysicalSampleRule
// is applied. In this case, if the sampling type is CLUSTER, the
// generator will detect that a plan for CLUSTER sampling could not be
// found.
//
Int32 SampleScanRule::promiseForOptimization(RelExpr * ,
Guidance * ,
Context * )
{
return AlwaysBetterPromise;
}
NABoolean SampleScanRule::topMatch(RelExpr * expr,
Context * context)
{
// If the rule doesn't match the general pattern, quit.
if (NOT Rule::topMatch(expr,context))
return FALSE;
const RelSample * befSample = (RelSample *)expr;
// If Sample has a full balance expression, quit
// If NOT random-relative sampling, quit.
if (befSample->isSimpleRandomRelative() == FALSE)
return FALSE;
// For now disable the rule by default unless it is cluster
// sampling. Allow firing through CQD
if ((CmpCommon::getDefault(ALLOW_DP2_ROW_SAMPLING) != DF_ON) AND
(befSample->getClusterSize() == -1))
return FALSE;
// If not in DP2, quit.
// if (NOT context->getReqdPhysicalProperty()->executeInDP2())
// return FALSE;
// else return true.
return TRUE;
}
RelExpr * SampleScanRule::nextSubstitute(RelExpr * before,
Context * context,
RuleSubstituteMemory * &memory)
{
CMPASSERT(before->getOperatorType() == REL_SAMPLE);
RelSample * befSample = (RelSample *)before;
Scan * befScan = (Scan *)(before->child(0).getPtr());
CMPASSERT(befSample->isSimpleRandomRelative() == TRUE);
// If scan is on VP table, then must be single partition scan
if ((befScan->isVerticalPartitionTableScan() == TRUE) AND
(befScan->isSingleVerticalPartitionScan() == FALSE))
return NULL;
// If scan has selection predicates, quit (for now). Ideally we
// want to do sampling and selection. Revisit this later.
if (befScan->selectionPred().entries() > 0)
return NULL;
// If the scan is in reverse order, quit.
Scan * result = (Scan *)befScan->copyTopNode(0, CmpCommon::statementHeap());
result->setGroupAttr(before->getGroupAttr());
result->selectionPred() += befSample->selectionPred();
result->samplePercent(befSample->getSamplePercent());
result->sampledColumns() += befSample->sampledColumns();
result->clusterSize(befSample->getClusterSize());
// Do not copy index Info from befScan, but initialize it for the
// sampleScan. addIndexInfo() will only add the info for the
// basetable accesspath since sampleScan does not use indexes.
//
result->addIndexInfo();
// Recompute estimated logical properties
// result->synthEstLogProp();
// Indicate that we were able to successfully apply the
// SampleScanRule for cluster sampling and thus should avoid
// applying the PhysicalSampleRule. The reason this is done is that
// even if this rule succeeds the optimizer could still pick a plan
// arising from the PhysicalSampleRule, but CLUSTER sampling can
// only be implemented by a sample scan.
//
befSample->setSampleScanSucceeded(TRUE);
return result;
}
//++MV,
NABoolean JoinToBushyTreeRule::topMatch (RelExpr * expr,
Context * context)
{
// This rule fires only for joins that has insert to MV log as a right child (DrivingMvLogInsert)
if (!expr->getInliningInfo().isDrivingMvLogInsert())
return FALSE;
if (NOT Rule::topMatch(expr,context))
return FALSE;
if (expr->getTolerateNonFatalError() == RelExpr::NOT_ATOMIC_)
return FALSE;
return TRUE;
}
// This rule implements a transformation that produces a bushy tree from left-linear tree of inner joins.
// This rule fires only for join that has insert to MV log as a right child (DrivingMvLogInsert)
//
// Pattern before the transformation, left linear tree
//
// Before - inner join
// / bef_j_map \
// / updateTableDesc_j \
// Inner Join #1 subtree 3
// / bef_j0_map \
// / updateTableDesc_j0 \
// subtree 1 subtree 2
//
// Bushy tree produced by this rule
//
// Result - inner join
// / bef_j0_map \
// subtree 1 Inner Join #1
// / bef_j_map \
// / updateTableDesc_j \
// subtree 2 subtree 3
//
RelExpr * JoinToBushyTreeRule::nextSubstitute(RelExpr * before,
Context * context,
RuleSubstituteMemory * & memory)
{
// do the default pattern substitution logic
Join *result = (Join *)Rule::nextSubstitute(before,NULL,memory);
Join *join1 = (Join *)result->child(1).getPtr();
Join *bef_join = (Join *)before;
Join *bef_join0 = (Join *)bef_join->child(0).getPtr();
// Copy updateSelectValudIdMaps and updateTableDesc from the
// befor tree to after tree. Please see the location, the source
// and the target of each copy in the transformation illustration
// above. Note updateTableDesc_j0 is not copied because the right
// child of the top join in the after tree is not an IUD node.
//
ValueIdMap* bef_j_map = bef_join->updateSelectValueIdMap();
if ( bef_j_map) {
join1 -> setUpdateSelectValueIdMap(
new (CmpCommon::statementHeap()) ValueIdMap(*bef_j_map)
);
}
join1 -> setUpdateTableDesc(bef_join->updateTableDesc());
ValueIdMap* bef_j0_map = bef_join0->updateSelectValueIdMap();
if ( bef_j0_map ) {
result -> setUpdateSelectValueIdMap(
new (CmpCommon::statementHeap()) ValueIdMap(*bef_j0_map)
);
}
// Mark the join tree rooted at result to be NOT pushed down
result->setAllowPushDown(FALSE);
// Mark the join tree rooted at join1 to be pushed down
join1->setAllowPushDown(TRUE);
// don't try to apply the rule on the result
result->contextInsensRules() += getNumber();
// enable left shift rule and join commutativity rule on the result
result->getInliningInfo().resetFlags(II_DrivingMvLogInsert);
// pull up predicates
result->selectionPred() += join1->selectionPred();
join1->selectionPred().clear();
// allocate a new Group Attributes for the join.
join1->setGroupAttr(new (CmpCommon::statementHeap()) GroupAttributes);
// add to it the input values of its children
ValueIdSet joinInput = ((RelExpr *)join1->getChild(0))->getGroupAttr()->getCharacteristicInputs();
joinInput += ((RelExpr *)join1->getChild(1))->getGroupAttr()->getCharacteristicInputs();
join1->getGroupAttr()->setCharacteristicInputs(joinInput);
// Compute the set of values that each child will potentially require
// for evaluating expressions. Also compute the set of values that
// the child is capable of producing as output.
// This call will NOT effect the characteristic input and output
// values of CutOps.
result->allocateAndPrimeGroupAttributes();
// Push down as many full or partial expressions that can be
// computed by the children. Recompute the Group Attributes of
// each child that is not a CutOp.
result->pushdownCoveredExpr
(result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
// synthesize the estimated logical properties for the new node and the result
join1->synthLogProp();
result->synthLogProp();
return result;
}
//--MV
// -----------------------------------------------------------------------
// methods for class OnceGuidance
// -----------------------------------------------------------------------
OnceGuidance::~OnceGuidance() {}
OnceGuidance::OnceGuidance(NAUnsigned exceptRule, CollHeap* h) :
allButOne_(h)
{
// make a rule subset with all applicable rules, but leave the
// one specified in the constructor argument out
allButOne_ = *(GlobalRuleSet->applicableRules());
allButOne_ -= exceptRule;
}
const RuleSubset * OnceGuidance::applicableRules()
{
return &allButOne_;
}
// -----------------------------------------------------------------------
// methods for class StopGuidance
// -----------------------------------------------------------------------
StopGuidance::StopGuidance(CollHeap* h) :
emptySet_(h)
{
}
StopGuidance::~StopGuidance() {}
const RuleSubset * StopGuidance::applicableRules()
{
return &emptySet_;
}
// -----------------------------------------------------------------------
// methods for class FilterGuidance
// -----------------------------------------------------------------------
FilterGuidance::~FilterGuidance() {}
// -----------------------------------------------------------------------
// FilterGuidance:
//
// Added to fix CR # 10-001006-2815.
//
// Only allow the filter rules to be applied - i.e.
// FilterRule0, FilterRule1, and FilterRule2. Used when exploring
// the child of a filter so that double filter nodes - i.e. a
// filter that is a child of a filter - are processed correctly.
//
// This is necessary because FilterRule1, which is the rule that
// would process a filter over a filter (since a filter is a unary
// non-leaf operator), will refuse to do anything if it's child is
// a filter. It assumes that the filter child will be merged with
// it's child when exploring. But the filter rule used to issue
// "StopGuidance", above, so this would never happen during exploring.
// It also never happens during optimization, because there is no
// implementation rule for a filter, and rules are only issued
// for the immediate child of an operator during optimization if the
// parent was actually implemented. Prior to adding this guidance
// class, once one filter ended up on top of another filter,
// the "double filter" could never be eliminated. One solution
// would be to issue no guidance at all, so all rules could fire.
// This was not done because it was assumed that the reason
// "StopGuidance" was issued was because applying all the rules to
// the child of a filter was causing a performance problem,
// since all the rules would have to be applied on the child
// again after the filter was merged with the child. By
// issuing a guidance that only allows the filter rules to fire,
// additional work will only be performed if we are indeed processing
// a "double filter". These were never being processed before, so
// it is not possible that this will result in any new unnecessary work.
//
// NOTE added 11/14/00 : This solution did not work. Apparently, the
// reason the filter rule issued "Stop Guidance" was not strictly for
// performance reasons, but to cover up some horrible problem with
// cascades. After implementing this fix, certain queries with
// aggregates, group by, or having clauses caused what looked like
// an infinite loop in cascades during exploring. I can't figure out
// why, and so this fix is backed out. Instead, FilterRule1 is
// modified to eliminate one of the filters if it encounters a filter
// over a filter. FilterRule::guidanceForExploringChild() now
// issues StopGuidance once again instead of FilterGuidance.
// -----------------------------------------------------------------------
FilterGuidance::FilterGuidance(CollHeap* h) :
filterRules_(h)
{
//Solution 10-030930-0076
//A filter cannot be pushed down onto a MultiJoin
//Therefore make sure MJ enumeration rules fire
//before filter push down onto a MJ.
//The MultiJoin enumeration rules will create joins
//onto which we can push down a filter.
filterRules_ += MJEnumRuleNumber;
filterRules_ += MJExpandRuleNumber;
filterRules_ += MJStarJoinIRuleNumber;
filterRules_ += MJStarJoinIIRuleNumber;
filterRules_.intersectSet(*(GlobalRuleSet->applicableRules()));
// make a rule subset with only the filter rules
// (FilterRule0, FilterRule1, FilterRule2)
filterRules_ += FilterRule0Number;
filterRules_ += FilterRule1Number;
filterRules_ += FilterRule2Number;
}
const RuleSubset * FilterGuidance::applicableRules()
{
return &filterRules_;
}
// -----------------------------------------------------------------------
// Methods for class TSJUDRRule
// TSJUDRRule implements UDR join transformations
// -----------------------------------------------------------------------
NABoolean TSJUDRRule::topMatch (RelExpr * expr,
Context * context)
{
if (NOT Rule::topMatch(expr,context))
return FALSE;
if ( REL_CALLSP == expr->getOperatorType ())
{
// If the CallSP doesn't have a child node, which it gets
// if it has a subquery or a UDF as one of its input parameters,
// then we don't need to do this transformation.
if ( (FALSE == ((CallSP *)expr)->hasSubquery ()) &&
(FALSE == ((CallSP *)expr)->hasUDF ()) )
{
return FALSE;
}
if ( expr->isPhysical ())
return FALSE;
return TRUE;
}
return FALSE;
}
// Raj P - 10/2000
// Used by CALL statement for stored procedures for Java
// Called to transform the logical node to a physical node
RelExpr * TSJUDRRule::nextSubstitute(RelExpr * before,
Context *context,
RuleSubstituteMemory * & memory)
{
Join *result = NULL;
switch (before->getOperatorType())
{
case REL_CALLSP:
{
CallSP * oldCallSP = (CallSP *) before;
RelExpr *newCallSP = oldCallSP->copyTopNode(0,CmpCommon::statementHeap());
newCallSP->setOperatorType (REL_CALLSP);
((CallSP *) newCallSP)->setPhysical ();
// Create the new TSJFlow expression
result = new (CmpCommon::statementHeap())
Join(before->child(0),
newCallSP,
REL_TSJ,
NULL,
FALSE,
FALSE,
CmpCommon::statementHeap(),
NULL,
NULL);
// Now set the group attributes of the result's top node.
result->setGroupAttr(before->getGroupAttr());
// Recompute the input values that each child requires as well as
// the output values that each child is capable of producing
result->allocateAndPrimeGroupAttributes();
// Output values produced by the left child of the tsjUDRFlow
// becomes the inputs for the right child of the tsjUDRFlow.
RelExpr * leftChild = result->child(0);
RelExpr * rightChild = result->child(1);
rightChild->getGroupAttr()->addCharacteristicInputs
(((RelExpr *)before->child(0))->getGroupAttr()->getCharacteristicOutputs());
// Push down as many full or partial expressions as can be
// computed by the children.
result->pushdownCoveredExpr(result->getGroupAttr()->getCharacteristicOutputs(),
result->getGroupAttr()->getCharacteristicInputs(),
result->selectionPred());
// Synthesize logical properties for this new leaf generic update node.
((CallSP *)rightChild)->synthLogProp();
result->setBlockStmt( before->isinBlockStmt() );
break;
}
default:
CMPASSERT(0);
break;
} // switch
return result;
} // TSJUDRRule::nextSubstitute()
NABoolean TSJUDRRule::isContextSensitive () const
{
return FALSE;
}
// methods for class HbaseAccessRule
// -----------------------------------------------------------------------
//HbaseScanRule::~HbaseScanRule() {}
//
//NABoolean HbaseScanRule::topMatch(RelExpr * relExpr, Context *context)
//{
// if (NOT Rule::topMatch(relExpr,context))
// return FALSE;
//
// Scan * scan= (Scan *) relExpr;
// if (scan->getTableDesc()->getNATable()->isHbaseTable() == FALSE)
// return FALSE;
//
// // Check for required physical properties that require an enforcer
// // operator to succeed.
// if (relExpr->rppRequiresEnforcer(context->getReqdPhysicalProperty()))
// return FALSE;
//
// return TRUE;
//
//}
//
//
//RelExpr * HbaseScanRule::nextSubstitute(RelExpr * before,
// Context * /*context*/,
// RuleSubstituteMemory *& /*mem*/)
//{
// CMPASSERT(before->getOperatorType() == REL_SCAN);
// Scan* bef = (Scan*) before;
//
// // Simply copy the contents of the HbaseAccess from the before pattern.
// HbaseAccess *result = new(CmpCommon::statementHeap())
// HbaseAccess(bef->getTableName(), bef->getTableDesc());
//
// bef->copyTopNode(result, CmpCommon::statementHeap());
//
// // now set the group attributes of the result's top node
// result->selectionPred() = bef->selectionPred();
// result->setGroupAttr(before->getGroupAttr());
//
// return result;
//}
//