blob: 58199cf9cbe38d18d35219077ed9608c6e7e6069 [file] [log] [blame]
/**********************************************************************
// @@@ START COPYRIGHT @@@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// @@@ END COPYRIGHT @@@
**********************************************************************/
/* -*-C++-*-
******************************************************************************
*
* File: EstLogProp.cpp
* Description: Estimated logical Properties
*
* Created: 10/12/94
* Language: C++
*
*
*
*
******************************************************************************
*/
// -----------------------------------------------------------------------
#include "Sqlcomp.h"
#include "EstLogProp.h"
#include "GroupAttr.h"
#include "VEGTable.h"
#include "ItemOther.h"
#include "RelUpdate.h"
#include "opt.h"
/////////////////////
#include "Analyzer.h"
#include "AppliedStatMan.h"
/////////////////////
THREAD_P ObjectCounter (*EstLogProp::counter_)(0);
EstLogProp::EstLogProp(CostScalar card,
ValueIdSet *preds,
SemiTSJEnum inputForSemiTSJ,
CANodeIdSet *nodeSet,
NABoolean cacheable,
NAMemory * h)
: resultCardinality_(card),
maxCardinality_((Cardinality)-1),
columnStats_(h),
inputForSemiTSJ_(inputForSemiTSJ),
cacheable_(cacheable),
isCardinalityEqOne_(FALSE)
{
if (preds != NULL)
unresolvedPreds_ = *preds;
nodeSet_ = nodeSet;
(*counter_).incrementCounter();
}
EstLogProp::EstLogProp(const EstLogProp &other, NAMemory * h)
: columnStats_(h),
isCardinalityEqOne_(other.isCardinalityEqOne_)
{
resultCardinality_ = other.resultCardinality_;
maxCardinality_ = other.maxCardinality_;
nodeSet_ = other.nodeSet_;
columnStats_ = other.columnStats_;
unresolvedPreds_ = other.unresolvedPreds_;
inputForSemiTSJ_ = other.inputForSemiTSJ_;
cacheable_ = other.cacheable_;
(*counter_).incrementCounter();
#ifndef NDEBUG
columnStats_.verifyInternalConsistency(0,columnStats_.entries()) ;
#endif
}
EstLogProp & EstLogProp::operator= (const EstLogProp &other)
{
resultCardinality_ = other.resultCardinality_;
maxCardinality_ = other.maxCardinality_;
columnStats_ = other.columnStats_;
unresolvedPreds_ = other.unresolvedPreds_;
inputForSemiTSJ_ = other.inputForSemiTSJ_;
isCardinalityEqOne_ = other.isCardinalityEqOne_;
nodeSet_ = other.nodeSet_;
cacheable_ = other.cacheable_;
#ifndef NDEBUG
columnStats_.verifyInternalConsistency(0,columnStats_.entries()) ;
#endif
return *this;
}
EstLogProp::~EstLogProp()
{
//get Handle to queryAnalysis
QueryAnalysis *qa = QueryAnalysis::Instance();
// check the destructor for efficiency
if ((cacheable_) &&
(nodeSet_) &&
(qa) && (qa->isAnalysisON()))
{
AppliedStatMan *appStatMan = qa->getASM();
if(appStatMan)
appStatMan->removeEntryIfThisObjectIsCached(this);
nodeSet_ = NULL;
}
unresolvedPreds_.clear();
columnStats_.clear();
(*counter_).decrementCounter();
}
NABoolean EstLogProp::reconcile(const EstLogProp &other)
{
// simple logic for now: merge unresolved preds lists,
// use the average for the cardinality,
// use the max values for numBaseTables_ and numJoinedTables_,
return FALSE;
unresolvedPreds_ += other.unresolvedPreds_;
resultCardinality_ = (resultCardinality_ + other.resultCardinality_) / 2;
} // EstLogProp::reconcile
COMPARE_RESULT EstLogProp::compareEstLogProp (const EstLogPropSharedPtr &other) const
{
if (this == other.get())
return SAME;
// First thing that we may want to compare is the CANodeSets of the EstLogProp
// if these are NOT NULL
// This would work if Query Analizer created nodeSet_ for this and other
if ((nodeSet_ != NULL) && (other->nodeSet_ != NULL))
{
if ((*nodeSet_) == (*(other->nodeSet_)))
return SAME;
else
return INCOMPATIBLE;
}
// This is the old logic after removing heuristic returning SAME for close
// EstLogProp like resultCardinality_/other->resultCardinality in [0.8,1.2]
// That heuristics was incompatible with Cascades assumption that in the case
// when pruning is on we cannot have 2 different context for optimization
// if their comparison returns SAME.
if ( resultCardinality_ == other->resultCardinality_ AND
( // Check for the case where we have two "empty" input logical properties.
( columnStats_.entries() == 0 AND other->columnStats_.entries() == 0)
OR
( columnStats_ == other->columnStats_ AND
unresolvedPreds_ == other->unresolvedPreds_ AND
inputForSemiTSJ_ == other->inputForSemiTSJ_ )
)
)
return SAME;
return INCOMPATIBLE;
}
// ---------------------------------------------------------------------
// Utility Routine: pickOutputs
//
// From the given ColStatDescList, populate columnStats_ with column
// descriptors that are useful based on the characteristic outputs for
// the group.
//
// Always include in the output the current histograms of the input data,
// and, if the histogram is contained in the required output list, then
// this is a useful histogram and will also be output.
//
// ---------------------------------------------------------------------
void EstLogProp::pickOutputs( ColStatDescList & columnStats,
const EstLogPropSharedPtr& inputEstLogProp,
const ValueIdSet specifiedOutputs,
const ValueIdSet predSet)
{
const ColStatDescList & outerColStatsList = inputEstLogProp->getColStats();
ValueIdSet colsRequiringHistograms = specifiedOutputs;
// (i) see if the selection predicates contain any constant value or a
// constant expression
// (ii) check if there are any columns of this table being joined to some other
// columns, which do not appear as characteristics outputs. There should be
// histograms available for these columns, as these might be needed later.
// This problem was seen for temporary tables created as normal_tables by the
// triggers.
colsRequiringHistograms.addSet(predSet.getColumnsForHistogram());
colStats().setMCSkewedValueLists(columnStats.getMCSkewedValueLists()) ;
NABoolean colStatDescAdded = FALSE;
for (CollIndex i=0; i < columnStats.entries(); i++)
{
// we probably don't need 'em all, but this is the easiest way to
// grab all of the multi-column uec information we'll need later
colStats().insertIntoUecList (columnStats.getUecList()) ;
colStats().setScanRowCountWithoutHint(columnStats.getScanRowCountWithoutHint());
NABoolean found = FALSE;
// Note: The following inserts into a ColStatDescList should not
// have to be deep copies. From this point on, ColStatDescs that
// describe the output of the calling operator are read-only.
ColStatDescSharedPtr colStatDesc = columnStats[i];
// the value-id we're looking for
const ValueId columnId = colStatDesc->getVEGColumn() ;
for (CollIndex j=0 ; j < outerColStatsList.entries() ; j++)
{
if (columnId == outerColStatsList[j]->getVEGColumn() OR
(CmpCommon::context()->showQueryStats()))
{
colStats().insert(colStatDesc) ;
found = TRUE;
if(!colStatDescAdded)
colStatDescAdded = TRUE;
break ; // jump to next ColStatDesc
}
}
// OK, the valueid doesn't match directly -- but there are still a
// couple of things to check in order to verify whether or not we're
// interested in keeping the i'th ColStatDesc ...
ValueId throwaway ; // used by the second clause below
if ( NOT found AND
(columnId != NULL_VALUE_ID) AND
(colsRequiringHistograms.contains (columnId) OR
colsRequiringHistograms.referencesTheGivenValue (columnId, throwaway) OR
columnId.isInvolvedInJoinAndConst() OR
CmpCommon::context()->showQueryStats() )
)
{
colStats().insert(colStatDesc);
found = TRUE;
if(!colStatDescAdded)
colStatDescAdded = TRUE;
}
if (CURRSTMT_OPTDEFAULTS->incorporateSkewInCosting())
{
// if the column is referenced for histogram, but is
// not needed beyond this time , then we shall save its
// max freq, which might be used later in costing if this
// column is a part of the partitioning key
ColStatsSharedPtr stat = colStatDesc->getColStats();
if (!(stat->isVirtualColForHist() ) && NOT found &&
!(stat->isOrigFakeHist() ) )
{
const ValueId col = colStatDesc->getColumn();
ColAnalysis * colAnalysis = col.colAnalysis();
if (colAnalysis)
{
NAColumn * column = stat->getStatColumns()[0];
if (column->isReferencedForHistogram())
{
CostScalar maxFreq = columnStats.getMaxFreq(columnId);
colAnalysis->setMaxFreq(maxFreq);
colAnalysis->setFinalUec(stat->getTotalUec());
colAnalysis->setFinalRC(stat->getRowcount());
}
}
}
}
} // for columnStats.entries()
if(!colStatDescAdded && columnStats.entries() > 0)
colStats().insert(columnStats[0]) ;
} // pickOutputs
// -----------------------------------------------------------------
// This method is used to map colStats of my child to mine
// -----------------------------------------------------------------
void EstLogProp::pickOutputsForUpdate( ColStatDescList colStatsFromScan,
const EstLogPropSharedPtr& inputEstLogProp,
const RelExpr & relExpr,
const ValueIdSet updateExprOutputs,
const ValueIdSet predSet)
{
GenericUpdate & updateExpr = (GenericUpdate &) relExpr;
ValueIdMap & updateSelectValueIdMap = updateExpr.updateToSelectMap();
// map these to my child's output(or right child's outputs, in case I am a leaf)
// to get the appropriate colStats;
ValueIdSet mappedSelectOutputs;
updateSelectValueIdMap.rewriteValueIdSetDown(updateExprOutputs, mappedSelectOutputs);
pickOutputs(colStatsFromScan, inputEstLogProp, mappedSelectOutputs, predSet);
// Now for each colStat, find the matching column of the insert,
// and create colStat for that
mapOutputsForUpdate(updateExpr, updateSelectValueIdMap);
}
// ------------------------------------------------------------------------------
// create my colStats based on my child's output, by converting the columns to
// that of mine
// ------------------------------------------------------------------------------
void EstLogProp::mapOutputsForUpdate(const GenericUpdate & updateExpr,
const ValueIdMap & updateSelectValueIdMap)
{
TableDesc * updateTable = updateExpr.getTableDesc();
for ( CollIndex i = 0; i < colStats().entries(); i++ )
{
ColStatDescSharedPtr colStatPtr = (colStats())[i];
const ValueId columnId = colStatPtr->getVEGColumn();
ValueId updateColVEGOutputId;
updateSelectValueIdMap.mapValueIdUp(updateColVEGOutputId, columnId);
ValueId updateBaseColumnId;
if (updateColVEGOutputId != columnId)
{
updateBaseColumnId = updateColVEGOutputId;
ValueIdSet baseColumns;
updateColVEGOutputId.getItemExpr()->findAll( ITM_BASECOLUMN, baseColumns, TRUE, TRUE );
// from all the columns extracted, get the one for Insert table
TableDesc * thisTable = NULL;
for (ValueId column = baseColumns.init(); baseColumns.next(column);
baseColumns.advance(column) )
{
ItemExpr * columnExpr = column.getItemExpr();
thisTable = ((BaseColumn *)columnExpr)->getTableDesc();
if (thisTable == updateTable)
{
// set my column as the base column
updateBaseColumnId = column;
break;
}
}
ColStatsSharedPtr inColStats = colStatPtr->getColStats();
ColStatsSharedPtr colStatsForUpdate(new (STMTHEAP) ColStats (*inColStats,STMTHEAP));
colStatsForUpdate->setStatColumn(updateBaseColumnId.getNAColumn());
// use this ColStat to generate new ColStat corresponding to the char output
// of the Update expression
ColStatDescSharedPtr colStatDescForUpdate(new (STMTHEAP) ColStatDesc(colStatsForUpdate,
updateBaseColumnId, // ValueId of the column that will be used
// as a column name, VEG and mergeStats
STMTHEAP), STMTHEAP);
colStatDescForUpdate->VEGColumn() = updateColVEGOutputId;
colStatDescForUpdate->mergeState().clear() ;
colStatDescForUpdate->mergeState().insert(updateBaseColumnId);
// Remove the old colStat and insert this colStat into the result colStatDescList
colStats().removeAt( i );
colStats().insertDeepCopyAt(i, colStatDescForUpdate, // colStats to be copied
1, // scale
FALSE);
}
}
}
// -------------------------------------------------------------------
// EstLogProp::getCardOfBusiestStream
// method returns the cardinality of the busiest stream for the given
// partitioning key
// -------------------------------------------------------------------
CostScalar
EstLogProp::getCardOfBusiestStream(const PartitioningFunction* partFunc,
Lng32 numOfParts,
GroupAttributes * groupAttr,
Lng32 countOfCPUs,
NABoolean isUnderNestedJoin)
{
// if there are no histograms available, return rowCount / number of
// partitions as the probesPerStream
ColStatDescList &colStatsList = this->colStats();
if ((colStatsList.entries() == 0) ||
(groupAttr && groupAttr->getIsProbeCacheable()))
{
return ( getResultCardinality() / numOfParts).minCsOne();
}
CostScalar cardinalityPerStream;
if (NOT isUnderNestedJoin)
cardinalityPerStream = colStatsList.getCardOfBusiestStream(
partFunc,
numOfParts,
groupAttr,
countOfCPUs);
else
{
CANodeIdSet* outerNodeSet = getNodeSet();
cardinalityPerStream = colStatsList.getCardOfBusiestStreamForUnderNJ(
outerNodeSet,
partFunc,
numOfParts,
groupAttr,
countOfCPUs);
}
return cardinalityPerStream;
} // EstLogProp::getCardOfBusiestStream
void EstLogProp::print(FILE* ofd, const char* prefix, const char *suffix) const
{
} // EstLogProp::print()
NABoolean EstLogProp::operator == (const EstLogProp & other) const
{
if (compareEstLogProp(&other) == SAME)
return TRUE;
else
return FALSE;
}