| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| #ifndef COLSTATDESC_H |
| #define COLSTATDESC_H |
| /* -*-C++-*- |
| ***************************************************************************** |
| * |
| * File: ColStatDesc.h |
| * Description: This file contains the declaration for ColStatDesc - |
| * the descriptor for the ColStats (column statistics) |
| * structure. |
| * |
| * Created: June 1, 1995 |
| * Language: C++ |
| * |
| * |
| * |
| * |
| ***************************************************************************** |
| */ |
| |
| // ----------------------------------------------------------------------- |
| |
| #include "Stats.h" /* includes CostScalar.h, Collections.h, ValueDesc.h ... */ |
| #include "NATable.h" |
| #include "SharedPtr.h" |
| #include "SharedPtrCollections.h" |
| |
| // ----------------------------------------------------------------------- |
| // Contents of this file |
| // ----------------------------------------------------------------------- |
| class MultiColumnUecList; |
| class ColStatDesc ; |
| class ColStatDescList; |
| |
| // needed as a forward reference -- not contained in this file! |
| class TableDesc; |
| class SelectivityHint; |
| class CardinalityHint; |
| class Join; |
| |
| // useful defn we use here & in PartKeyDist.[h cpp] |
| typedef LIST(EncodedValue) EncodedValueList ; |
| typedef SharedPtr<ColStatDesc> ColStatDescSharedPtr; |
| |
| |
| |
| // ----------------------------------------------------------------------- |
| // Multi-column uec list |
| // |
| // An association list of <list of table column,uec-count>. |
| // |
| // We maintain multi-column uec information in order to accurately |
| // estimate rowcounts for joins involving multiple predicates, the output |
| // of a groupby aggregate, and possibly other cases I'm not thinking of |
| // right now. This class is a datamember of StatsList, and then the |
| // ColStatDescList's created from the StatsList have read-only access to |
| // the one copy that's maintained for all ColStatDesc's. |
| // ----------------------------------------------------------------------- |
| |
| #define MultiColumnUecListIterator NAHashDictionaryIterator<ValueIdSet,CostScalar> |
| |
| class MultiColumnUecList : public HASHDICTIONARY(ValueIdSet,CostScalar) |
| { |
| public: |
| static ULng32 HashFunction (const ValueIdSet & input) ; |
| |
| MultiColumnUecList (const StatsList & initStats, |
| const ValueIdList & tableColumns ) ; |
| |
| virtual ~MultiColumnUecList() {} ; |
| |
| // given a ValueIdSet of table columns, returns the stored groupUec (if |
| // it exists), else returns csMinusOne |
| CostScalar lookup (const ValueIdSet & key) const ; |
| |
| void initializeMCUecForUniqueIndxes(TableDesc &table, |
| const CostScalar & tableRowcount); |
| |
| // ----------------------------------------------------------------------- |
| // useMCUecforCorrPreds |
| // |
| // used to calculate an adjustment in the case of multiple predicates being |
| // applied to highly correlated table columns |
| // (fn useMultiUecIfCorrelatedPreds(), subr of |
| // fn estimateCardinality() ) |
| // |
| // given a list of <ValueId, CostScalar> pairs representing all of the |
| // histograms which have been reduced, and the amount (reduction factor) |
| // they've been reduced, return TRUE/FALSE if, in the list of these |
| // predicates, there are 2+ from the same table for which we have |
| // multi-column uec information and which are "highly correlated" |
| // (defined below). |
| // |
| // If both of these conditions are met, then we supply a factor |
| // "reductionAdjustment" which should be applied to the current rowcount |
| // estimate in order to increase it beyond its current value, to take |
| // into account the fact that we are applying multiple predicates to |
| // highly correlated columns, which we assume means that beyond the |
| // most selective predicate, the additional predicates are redundant |
| // in part or whole (i.e., they remove the "same rows" as the other |
| // predicates). |
| // |
| // "numPredicates", another parameter, is specified by the calling |
| // routine to let this routine know how many histograms total have been |
| // altered inside of the estimateCardinality() routine. This value is |
| // used as an upper bound for the computation of how many columns in a |
| // single table have had predicates applied to them (in an attempt to |
| // avoid creating too large of an estimate in cases where we join |
| // together columns of the same table). |
| // |
| NABoolean useMCUecForCorrPreds ( |
| NAHashDictionary<ValueId, CostScalar> & predReductions, /* in/mod */ |
| const CollIndex numPredicates, /* in */ |
| const CostScalar& oldRowCount, /* in */ |
| const CostScalar& newRowCount, /* in */ |
| NABoolean largeTabStatsNeeded, |
| const ColStatDescList & source, |
| CostScalar & reductionAdjustment) ; /* out */ |
| |
| // ----------------------------------------------------------------------- |
| // getUecForMCJoin |
| // |
| // used by multi-column join code (fn useMultiUecIfMultipleJoins(), subr of |
| // fn estimateCardinality() ) |
| // |
| // given a list of ValueIdLists representing the two (or more) join |
| // predicates between (hopefully) two tables, return TRUE/FALSE if we |
| // have the necessary multi-column uec information about some of the |
| // columns involved in this join; return the columns we don't have MC |
| // info for, and return this multi-column uec number. |
| // we also supply a boolean flag "largeTableNeedsStats" which is the |
| // ColStats flag "isUpStatsNeeded" -- this helps decide whether to fire |
| // off a 6007-warning in the case where the best possible multi-column |
| // stats don't exist |
| // |
| // we also supply a boolean flag "largeTableNeedsStats" which is the |
| // ColStats flag "isUpStatsNeeded" -- this helps decide whether to fire |
| // off a 6007-warning in the case where the best possible multi-column |
| // stats don't exist |
| // |
| // i.e., if we do |
| // "sel * from T1,T2 where T1.a=T2.b AND T1.c=T2.d", |
| // we need MC-info on (T1.a,T1.c) and (T2.b,T2.d) -- if this exists, |
| // then return TRUE and set maxMultiColUec to be the larger of the two |
| // corresponding multi-column uec values for (T1.a,T1.c) & (T2.b, T2.d) |
| // |
| // as a more general case, if we do |
| // "sel * from T1,T2 where T1.c1=T2.c1 AND T1.c2=T2.c2 AND ... T1.cn=T2.cn", |
| // then we want to return the largest ValueIdSets (t1.1,...,t1.m) (t2.1,...t2.m) |
| // such that there is exactly one t2.1 for every t1.1 -- any remaining ValueIdSets |
| // in joinValueIdPairs are returned to the calling function, which will have to |
| // apply single-column selectivity for them. |
| // |
| NABoolean getUecForMCJoin (LIST(ValueIdList) & joinValueIdPairs, /* in/out */ |
| const NABoolean largeTableNeedsStats, /* in */ |
| const Join * expr, |
| CostScalar & prodMaxInitUec, /* out */ |
| CostScalar & maxMultiColUec, /* out */ |
| CostScalar & baseRCForMaxMCUEC, /* out */ |
| CostScalar & leftMCUec, /* out */ |
| NABoolean & checkForLowBound, /* out */ |
| NABoolean & joinOnUnique, |
| const ColStatDescList & colStats, |
| CostScalar redFromSC=csMinusOne); |
| |
| //------------------------------------------------------------------ |
| // Determine if the join predicates consist of column sets where one |
| // of the sides is unique and shares relationship "similar" to PK/FK |
| // with the other side. If so, return the cardinality of non-unique |
| // side. This feature is OFF by default if there are more than one |
| // joining columns. It'll be controlled by COMP_BOOL_149. |
| //------------------------------------------------------------------ |
| CostScalar getRowcountOfNonUniqueColSet(const Join *expr, |
| ValueIdList lhsColList, |
| ValueIdList rhsColList, |
| NABoolean leftUnique, |
| NABoolean rightUnique); |
| |
| // -------------------------------------------------------------- |
| // display missing stats warning. The warning is displayed based |
| // on the CQDs: |
| // HIST_MISSING_STATS_WARNING_LEVEL - The CQD has 5 values |
| // It is used to control the number of missing stats warnings |
| // that should be generated. |
| // 0: Display no warnings. |
| // 1: Display only missing single column stats warnings. These include 6008 and 6011 |
| // 2: Display all single column missing stats warnings and |
| // multi-column missing stats warnings for Scans only. |
| // 3: Display all missing single column stats warnings and missing |
| // multi-column stats warnings for Scans and GroupBy operators only.. |
| // 4: Display all missing single column stats and missing multi-column |
| // stats warnings for all operators including Scans, Joins and groupBys. |
| // THE CQD also does not have an impact on the auto update stats behavior. The stats will |
| // still be automatically generated even if the warnings have been suppressed. |
| // Default behavior is to generate all warnings |
| // -------------------------------------------------------------- |
| void |
| displayMissingStatsWarning(TableDesc * mostRefdTable, |
| ValueIdSet predCols, |
| NABoolean largeTableNeedsStats, |
| NABoolean displayWarning, |
| const ColStatDescList & colStats, |
| CostScalar redFromSC = csMinusOne, |
| NABoolean quickStats = FALSE, |
| OperatorTypeEnum op = REL_SCAN) const; |
| |
| // ----------------------------------------------------------------- |
| // isMCStatsUseful is used to determine if there is any possibility |
| // of optimizer benefiting from multi-column stats. The MC stats |
| // are said to be not helpful, if any subset of given column set |
| // is orthogonal. More heuristics can be added later to determine |
| // usefulness of MC stats |
| // ----------------------------------------------------------------- |
| NABoolean |
| isMCStatsUseful(ValueIdSet columnSet, |
| TableDesc * tableDesc) const; |
| |
| // ------------------------------------------------------------------ |
| // Combine MC UEC of subset of columns from columns with reduction to get |
| // MC UEC of larger set |
| // ------------------------------------------------------------------ |
| |
| NABoolean createMCStatsForColumnSet(ValueIdSet colsWithReduction, |
| ValueIdSet & cumulativeColSetWithMCUEC, |
| CostScalar & maxMultiColUec, |
| CostScalar baseRowCount |
| ); |
| // ----------------------------------------------------------------------- |
| // findMatchingColumns |
| // |
| // subroutine of getUecForMCJoin; used to find correspondences between |
| // lists of table columns w.r.t. the join predicates |
| // ----------------------------------------------------------------------- |
| ValueIdSet findMatchingColumns (const ValueIdSet & t1Cols, /* in */ |
| const LIST(ValueIdList) & joinPairs, /* in */ |
| LIST(ValueIdList) & remainingPairs, /* out */ |
| CostScalar & maxInitUecProduct, /* out */ |
| CostScalar & minInitUecProduct, /* out */ |
| NABoolean & checkForLowBound /* out */ |
| ) const ; |
| |
| // ----------------------------------------------------------------------- |
| // largestSubset |
| // |
| // used by multi-column group by code (GroupbyAgg::synthEstLogProp) |
| // |
| // given a list of table columns we're interested in, returns a list |
| // representing the largest subset of the input list for which we have |
| // multicolumn uec information |
| // |
| // if there are ties, return the one with largest correlation |
| ValueIdSet largestSubset (const ValueIdSet & columns) const ; |
| |
| |
| |
| //--------------------------------------------------------------------- |
| //MultiColumnUecList::getListOfSubsetsContainsColumn |
| // |
| //Input: columnList |
| //Output: List of ValueIdSet that contains the last column in the list |
| //and other columns from the columnList only |
| //Constraints: ColumnId that is passed in can be VegRef that contains |
| // the base id for that column at the first level or it can |
| // be a the id corresponding to a index on the table. |
| //--------------------------------------------------------------------- |
| LIST(ValueIdSet) * getListOfSubsetsContainsColumns( |
| const ValueIdList & columns, |
| LIST(CostScalar)& uecCount |
| ) const; |
| |
| //--------------------------------------------------------------------- |
| //MultiColumnUecList::findDenom |
| // |
| //Input: list of columns |
| //Output: Boolean. if there is a multi-column histogram exactly matching |
| //the input then return true or return false |
| //Constraints: ValueIds for the columns need to be base valueIds. |
| //--------------------------------------------------------------------- |
| NABoolean findDenom(const ValueIdSet & columns)const; |
| |
| // add the <table-column-valueidset, uec-value> pairs from OTHER into |
| // THIS (the ones that aren't already there) |
| void insertList (const MultiColumnUecList * other) ; |
| |
| void insertMappedList(const MultiColumnUecList *other, |
| const ValueIdMap &map); // map is used in "up" direction |
| |
| // this routine answers whether there's any bona fide multi-column |
| // information contained in this list -- i.e., any |
| // <valueidset,costscalar> pairs where the valueidset has more than one |
| // entry |
| NABoolean containsMCinfo() const ; |
| |
| // display the contents of the MultiColumnUecList |
| void print (FILE *f = stdout, |
| const char * prefix = DEFAULT_INDENT, |
| const char * suffix = "") const ; |
| void display () const ; |
| // inserts a <table-column-valueidset, uec-value> pair |
| // returns TRUE if successful, FALSE if not successful (e.g., already exists) |
| // ==> private because no one should ever change this object! |
| // made public, to handle updateMCUecForUniqueIndexes - |
| |
| NABoolean insertPair (const ValueIdSet & key, const CostScalar & groupUec) ; |
| |
| |
| // updates groupUec for columns in the Multi-column Uec list |
| // This will be used only if no multi-col uec exists for the unique |
| // index or if the uec for unique index is not equal to the row count |
| // as it should be |
| |
| NABoolean updatePair (const ValueIdSet & columns, |
| const CostScalar & groupUec); |
| |
| // Following method creates multi-col UEC for larger set of columns |
| // using partial overlapping multi-col UECs. |
| // For example, if MC-UEC available - (a, b, c) (c, d). |
| // Then MC (a, b, c, d) = MC (a, b, c) * MC (c, d) / MC (c) |
| NABoolean createMCUECWithOverlappingColSets(ValueIdSet & remainingCols, |
| ValueIdSet & cumulativeColSetWithMCUEC, |
| CostScalar & multiColUec, |
| CostScalar oldRowcount); |
| |
| |
| // Following method creates multi-col UEC for larger set of columns |
| // using partial disjoint multi-col UECs. |
| // For example, if MC-UEC available - (a, b) (c, d). |
| // Then MC (a, b, c, d) = MC (a, b) * MC (c, d) |
| NABoolean createMCUECWithDisjointColSets(ValueIdSet & remainingCols, |
| ValueIdSet & cumulativeColSetWithMCUEC, |
| CostScalar & multiColUec, |
| CostScalar oldRowCount); |
| |
| // In the following method, we shall create a new MC list. |
| // This list contains MC-UEC for only those column set, which include |
| // all columns of colsWithReductions and atmost one column from |
| // cumulativeColSetWithMCUEC |
| |
| MultiColumnUecList * createMCListForRemainingCols( |
| ValueIdSet colsWithReductions, |
| ValueIdSet cumulativeColSetWithMCUEC); |
| |
| MultiColumnUecList () ; // added 05/23/05. |
| |
| private: |
| |
| // this class should never create an uninitialized object! |
| // this class should never be copied! |
| // Commented because we need to create a temporary MultiColumnUecList |
| // in the method createMCListForRemainingCols - 05/23/05 |
| // MultiColumnUecList () ; |
| MultiColumnUecList (const MultiColumnUecList & other) ; |
| }; |
| |
| #define MultiColumnSkewedValueListsIterator NAHashDictionaryIterator<ValueIdList,MCSkewedValueList> |
| |
| class MultiColumnSkewedValueLists : public HASHDICTIONARY(ValueIdList,MCSkewedValueList) |
| { |
| public: |
| static ULng32 HashFunction (const ValueIdList & input) ; |
| |
| MultiColumnSkewedValueLists (); |
| MultiColumnSkewedValueLists (const StatsList & initStats, |
| const ValueIdList & tableColumns ) ; |
| |
| virtual ~MultiColumnSkewedValueLists() { }; |
| |
| // This method will retrieve skew values if found, otherwise NULL is returned. |
| const MCSkewedValueList* getMCSkewedValueList(ValueIdSet colSet, ValueIdList & colGroup); |
| |
| private: |
| MultiColumnSkewedValueLists (const MultiColumnSkewedValueLists & other) ; |
| }; |
| |
| // ----------------------------------------------------------------------- |
| // A column statistics descriptor contains a valueid for the column |
| // that makes up this column statistics object, as well as a pointer |
| // to the ColStats structure. |
| // ----------------------------------------------------------------------- |
| class ColStatDesc : public NABasicObject |
| { |
| friend class TableDesc; |
| |
| protected: |
| |
| // copy method |
| void copy (const ColStatDesc& other) ; |
| |
| // deallocate method |
| void deallocate () ; |
| |
| public: |
| |
| // the following enum is used only in synchronizeStats() |
| enum SynchSpecialFlag { DO_NOTHING_SPECIAL, DO_NOT_REDUCE_UEC, SET_UEC_TO_ONE } ; |
| |
| // default constructor |
| ColStatDesc (NAMemory * h=HISTHEAP) : |
| column_(), VEGcolumn_(), nonVegEquals_(h), colStats_(NULL), modified_(FALSE), |
| inputCard_(1.0) |
| { } |
| |
| // constructor |
| ColStatDesc (const ColStatsSharedPtr& stats, const ValueIdList& columnList, NAMemory * h=HISTHEAP) ; |
| |
| // constructor used to create a ColStatDesc for a generated column. |
| ColStatDesc (const ColStatsSharedPtr& stats, const ValueId & column, NAMemory * h=HISTHEAP) ; |
| |
| // virtual destructor |
| ~ColStatDesc() { deallocate(); } |
| |
| |
| // copy constructor |
| ColStatDesc (const ColStatDesc& other, NAMemory * h=HISTHEAP) : |
| nonVegEquals_(h) |
| { copy(other); } |
| |
| // assignment operator |
| inline ColStatDesc & operator= (const ColStatDesc& other) |
| { |
| if ( &other != this ) // support a=a |
| { deallocate(); copy(other); } |
| return *this; |
| } |
| |
| // comparison operator |
| NABoolean operator== (const ColStatDesc& other) const |
| { return (column_ == other.column_) ; } |
| |
| // accessor functions (all const) |
| inline const ValueId & getColumn () const { return column_; } |
| inline const ValueId & getVEGColumn () const { return VEGcolumn_; } |
| inline const ColStatsSharedPtr getColStats () const { return colStats_; } |
| inline NABoolean isModified () const { return modified_; } |
| inline NABoolean isFromInnerTable () const { return fromInnerTable_; } |
| |
| inline const ValueIdSet & getMergeState() const { return mergeState_; } |
| inline const SHPTR_LIST(ColStatDescSharedPtr)& |
| getNonVegEquals() const { return nonVegEquals_; } |
| |
| inline const ValueIdSet & getAppliedPreds () const { return appliedPreds_; } |
| inline NABoolean isPredicateApplied (const ValueId & newPredicate) const |
| { return appliedPreds_.contains( newPredicate ); } |
| NABoolean isSimilarPredicateApplied ( const OperatorTypeEnum op ) const; |
| |
| NABoolean derivOfLikeAndSimilarPredApp(const ItemExpr * pred ) ; |
| |
| CostScalar selForRelativeRange (const OperatorTypeEnum op, |
| const ValueId & column, |
| ItemExpr *newPred) const; |
| |
| |
| // these accessor functions return access to the private data members |
| ColStatsSharedPtr getColStatsToModify () ; |
| inline ValueId & VEGColumn () { return VEGcolumn_; } |
| inline ValueIdSet & mergeState () { return mergeState_; } |
| inline SHPTR_LIST(ColStatDescSharedPtr)& nonVegEquals () { return nonVegEquals_; } |
| inline ValueIdSet & appliedPreds () { return appliedPreds_; } |
| |
| // manipulation functions |
| inline void setModified (NABoolean flag=TRUE) { modified_ = flag; } |
| inline void setFromInnerTable (NABoolean flag=TRUE) { fromInnerTable_ = flag; } |
| inline void setColStats (const ColStatsSharedPtr& stats) { colStats_ = stats; } |
| |
| inline void addToAppliedPreds (const ValueId & newPredicate) |
| { appliedPreds_.insert( newPredicate ); } |
| inline void removeFromAppliedPreds (const ValueId & newPredicate) |
| { appliedPreds_.remove( newPredicate ); } |
| |
| // apply the following selectivity to the column statistics |
| void applySel (const CostScalar & selectivity) ; |
| |
| void applySelIfSpecifiedViaHint(ItemExpr * pred, const CostScalar & oldRowcount); |
| |
| void setInputCard (CostScalar rows) {inputCard_ = rows; } |
| |
| CostScalar getInputCard() { return inputCard_; } |
| |
| void mapUpAndCopy (const ColStatDesc& other, ValueIdMap &map) ; |
| |
| // synchronize/map the RowCount and UEC change of one set of aggregate |
| // statistics with the current set of aggregate statistics |
| void synchronizeStats (const CostScalar & baseRowcount, |
| const CostScalar & newRowcount, |
| SynchSpecialFlag=DO_NOTHING_SPECIAL) ; |
| |
| // modify statistics by applying the effect of the provided predicate |
| NABoolean modifyStats (ItemExpr *pred, CostScalar &newRowcount, |
| CostScalar *maxSelectivity=NULL); |
| |
| // merge twoColStatDescs from the same table |
| NABoolean mergeColStatDescOfSameTable(ColStatDescSharedPtr &rightColStats, |
| OperatorTypeEnum opType = ITM_FIRST_ITEM_OP); |
| |
| // merge two ColStatDesc's |
| void mergeColStatDesc(ColStatDescSharedPtr& mergedStatDesc, |
| MergeType mergeMethod, |
| NABoolean forceMerge = FALSE, |
| OperatorTypeEnum opType = ITM_FIRST_ITEM_OP, |
| NABoolean mergeFVs=TRUE) ; |
| |
| // ----------------------------------------------------------------------- |
| // Reduce uec by correct amount, instead of what we've done in the past. |
| // ----------------------------------------------------------------------- |
| static CostScalar |
| calculateCorrectResultUec (const CostScalar & baseRows, |
| const CostScalar & newRows, |
| const CostScalar & baseUec) ; |
| |
| // display the colStats_ inside the colstatdesc: |
| void print (FILE *f = stdout, |
| const char * prefix = DEFAULT_INDENT, |
| const char * suffix = "", |
| CollHeap *c=NULL, char *buf=NULL, |
| NABoolean hideDetail=FALSE) const ; |
| void display () const ; |
| |
| // ------------------------------------------------------------------------ |
| // The first four private members are worthy of a little discussion and |
| // clarification. |
| // |
| // 'column' is the column for which these stats apply. |
| // |
| // 'VEGcolumn_' is the VEGRef id corresponding with column_ e.g., |
| // column_ is T2.a; If IT2.a is an index on T2.a, this VEG includes |
| // (IT2.a, T2.a), further if there exists a predicate T2.a=T2.b and |
| // IT3.b is an index on T2.b, the VEG includes (IT2.a, T2.a, IT3.b, |
| // T2.b). VEGcolumn_ may, over time, contain an instantiate_null |
| // operator when the associated column has become the output of an outer |
| // join, as well as an value_id_union map when the column is the output |
| // of a union. |
| // |
| // 'mergeState_' is a set associated with column_. Each set indicates |
| // which of the statistics from the matching VEGcolumn_ entry have been |
| // merged. Each set starts with only the ValueId from column_, but as |
| // VEG preds are applied, this set grows. This entry is necessary to |
| // support nested index joins, and as a side benefit prevent a VEGPred |
| // of the form a=a from doing anything to the statistics for 'a'. |
| // |
| // 'nonVegEquals_' tracks information regarding EQ-Joins applied to columns |
| // outside the 'normal' realm of VEG predicates. |
| // |
| // This, primarily, applies to equality predicates underneith an OR, |
| // which are not placed in a VEG. The information is used 2 ways: |
| // - For AND's beneath an OR, this list is used to provide transitivity; |
| // - Directly beneath OR's, this list indicates what ColStatDesc to |
| // update/recreate and add back into the containing ColStatDescList. |
| // ------------------------------------------------------------------------ |
| |
| private: |
| |
| // compress the ColStats for local predicates on a column |
| // The local predicates should involve a constant |
| // e.g. |
| // * t1.col1 = 3 |
| // * t1.col1 < 3 |
| // * t1.col1 > 1 |
| // * t1.col1 > 1 and t1.col1 < 3 |
| void compressColStatsForQueryPreds(ItemExpr * lowerBound, |
| ItemExpr * upperBound, |
| NABoolean hasJoinPred = FALSE) |
| { colStats_->compressColStatsForQueryPreds(lowerBound, upperBound, hasJoinPred); }; |
| |
| ValueId column_; // identify the base table column(s) of which |
| // // these statistics is/are comprised |
| ValueId VEGcolumn_; // identify the equivalent VEG (corresponding |
| // // to base table columns) of which these stats |
| // // are comprised |
| ValueIdSet mergeState_; // indicate which histograms have been merged |
| // // (so far) |
| SHPTR_LIST(ColStatDescSharedPtr) // pointers to ColStatDescs that were EQ-merged |
| nonVegEquals_; // to create this ColStatDesc, even though they |
| // // were not both contained in a single VEG. |
| NABoolean fromInnerTable_; // used in nonVegEquals_ related OR processing |
| ValueIdSet appliedPreds_; // All Predicates applied to this ColStats |
| ColStatsSharedPtr colStats_; // reference to ColStats structure |
| NABoolean modified_; // FALSE => the Colstats structure has not yet |
| // // been modified |
| CostScalar inputCard_; // any input cardinality which could be reflected |
| // in this colStat |
| }; |
| |
| // ----------------------------------------------------------------------- |
| // |
| // class ColStatDescList : a LIST of ColStatDescSharedPtr's |
| // |
| // Since a ColStatDesc is the histogram for a single table column, a CSDL |
| // is the histograms modelling the columns for an entire table (or node in |
| // a query plan, i.e., a "virtual table", one produced from multiple |
| // tables joined together) |
| // |
| // ----------------------------------------------------------------------- |
| |
| |
| class ColStatDescList: public SHPTR_LIST (ColStatDescSharedPtr) |
| { |
| public: |
| |
| ColStatDescList (NAMemory* h=0) : SHPTR_LIST(ColStatDescSharedPtr)(h), uecList_(NULL), |
| useCapForLowBound_ (FALSE), |
| joinOnSingleCol_(FALSE), |
| scanRowCountWithoutHint_ (-1.0), |
| mcSkewedValueLists_(NULL) |
| {} |
| |
| ColStatDescList (const ColStatDescList & other, NAMemory* h/*=0*/) : |
| SHPTR_LIST(ColStatDescSharedPtr)(other,h), |
| uecList_(other.uecList_), |
| useCapForLowBound_ (other.useCapForLowBound_), |
| joinOnSingleCol_ (other.joinOnSingleCol_), |
| scanRowCountWithoutHint_(other.scanRowCountWithoutHint_), |
| joinedCols_(other.joinedCols_), |
| mcSkewedValueLists_(other.mcSkewedValueLists_) |
| {} |
| |
| // should the destructor do anything? for now, no |
| virtual ~ColStatDescList () {} |
| |
| // Returns TRUE if at least one of the histograms is a |
| // fake histogram. A fake histogram is a histogram that |
| // was synthesized by ColStats from info. other than statistics. |
| // We need to test for this because we don't want to cost |
| // MDAM relying on fake histograms. MDAM must not be chosen |
| // when there are fake histograms. |
| NABoolean containsAtLeastOneFake() const; |
| |
| NABoolean selectivityHintApplied() const; |
| |
| // Returns TRUE if the given Column is contained in this |
| // ColStatDescList. The ColStatDesc could have also been merged |
| // with the other colStatDesc |
| |
| NABoolean contains(const ValueId & column) const; |
| |
| // Returns TRUE if the full column set is contained in this |
| // ColStatDescList. |
| |
| NABoolean contains(const ValueIdList & colList) const; |
| |
| // Returns the (possibly multi-column) uec value for the |
| // ValueId-specified column(s) in the parameter list. |
| // |
| // NB: in the case where we cannot find uec information for one or more |
| // of 'columns' (i.e., histogram isn't available), this method returns |
| // -1. Anyone using this method should check for this value! |
| CostScalar getAggregateUec (const ValueIdSet & columns) const ; |
| |
| // set base uec for all columns in their colAnalysis |
| void setBaseUecForAllCols(); |
| |
| void setScaleFactor(CostScalar val); |
| |
| // getColStatDescIndexForColWithMaxUec(leftColIndex, leftLeafValues) |
| // From the given ValueIdSet, the method returns the index of the histogram |
| // with max UEC |
| NABoolean getColStatDescIndexForColWithMaxUec(CollIndex & leftColIndex, |
| const ValueIdSet & leftLeafValues) const; |
| |
| void addToAppliedPredsOfAllCSDs(const ValueIdSet & colSet, |
| const ValueId & newPredicate); |
| |
| // some usages of CSDL want to be able to create and then delete individual |
| // CSDL's -- for these users, we have an explicit destroy function |
| void destroy () ; |
| |
| inline ColStatDescList & operator = (const ColStatDescList &other) |
| { |
| this->SHPTR_LIST(ColStatDescSharedPtr)::operator = ( other ); |
| uecList_ = other.uecList_ ; |
| mcSkewedValueLists_ = other.mcSkewedValueLists_; |
| return *this; |
| } |
| |
| // --------------------------------------------------------------------- |
| // Methods for doing Deep Copies of the ColStatDescs whose pointers are |
| // inserted into a ColStatDescList. |
| // In the various routines, |
| // 'firstN' specifies that only the first N entries in the source are to |
| // be inserted. |
| // 'scale' specifies the factor by which the RowCounts (not UECs) should |
| // be multiplied. |
| // 'shapeChangedMask' is AND'd with the current setting of the shape- |
| // changed flag, allowing it to be either left alone (the default) or |
| // cleared. |
| // --------------------------------------------------------------------- |
| void appendDeepCopy (const ColStatDescList & source, |
| const CollIndex firstN, |
| const CostScalar & scale = 1, |
| const NABoolean shapeChangedMask = TRUE) ; |
| |
| void makeDeepCopy (const ColStatDescList & source, |
| const CostScalar & scale = 1, |
| const NABoolean shapeChangedMask = TRUE) ; |
| |
| void prependDeepCopy (const ColStatDescList & source, |
| const CollIndex firstN, |
| const CostScalar & scale = 1, |
| const NABoolean shapeChangedMask = TRUE) ; |
| |
| void insertDeepCopy (const ColStatDescSharedPtr & source, |
| const CostScalar & scale = 1, |
| const NABoolean shapeChangedMask = TRUE) ; |
| |
| void insertDeepCopyAt (const CollIndex entry, |
| const ColStatDescSharedPtr & source, |
| const CostScalar & scale = 1, |
| const NABoolean shapeChangedMask = TRUE) ; |
| void makeMappedDeepCopy( |
| const ColStatDescList & source, |
| ValueIdMap &map, // map source "up" |
| NABoolean includeUnmappedColumns); |
| |
| void removeDeepCopyAt (const CollIndex entry) ; |
| |
| void computeMaxFreq(NABoolean forced = FALSE); |
| |
| // add colStatDesc for a virtual column in this colStatDescList |
| // This will be used for cases like inserts, transpose or rowsets, |
| // where the column is being equated to a constant or the right child |
| // of the join is a constant. |
| |
| void addColStatDescForVirtualCol(const CostScalar & uec, |
| const CostScalar & rowCount, |
| const ValueId colId, |
| const ValueId vegCol, |
| const ValueId mergeState, |
| const RelExpr * expr, |
| NABoolean defineVirtual = TRUE); |
| |
| // --------------------------------------------------------------------- |
| // Method for estimating the Cardinality, given a set of predicates |
| // and column statistics. |
| // |
| // estimateCardinality takes special actions when it is invoked against |
| // the children of ITM_AND and ITM_OR operators. |
| // --------------------------------------------------------------------- |
| CostScalar estimateCardinality (const CostScalar & initalRowCount, |
| const ValueIdSet & setOfPredicates, |
| const ValueIdSet & outerReferences, |
| const Join * expr, |
| const SelectivityHint * selHint, |
| const CardinalityHint * cardHint, |
| CollIndex & numOuterColStats, |
| ValueIdSet & unresolvedPreds, |
| MergeType mergeMethod = |
| INNER_JOIN_MERGE, |
| OperatorTypeEnum exprOpCode = |
| ITM_FIRST_ITEM_OP , // no-op value |
| CostScalar *maxSelectivity=NULL); |
| |
| |
| // Adjust the rowcount based on the cardinality / selectivity / count(*) hint |
| CostScalar adjustRowcountWithHint(const CardinalityHint * cardHint, |
| const SelectivityHint * selHint, |
| const ValueIdSet & setOfPredicates, |
| CostScalar & newRowCount, |
| const CostScalar & initialRowCount); |
| |
| void copyAndScaleHistograms(CostScalar scale); |
| |
| void setBaseUecToTotalUec(); |
| |
| CostScalar getMaxFreq(ValueId col); |
| |
| CostScalar getUEC(ValueId col); |
| |
| // get maximum frequency for the given column set |
| CostScalar getMaxOfMaxFreqOfCol(const ValueIdSet & baseColSet) ; |
| |
| // get maximum frequency for the given column set |
| CostScalar getMinOfMaxFreqOfCol(const ValueIdSet & baseColSet) ; |
| |
| // get max frequency of the leaves of Case expression |
| CostScalar getMaxFreqForCaseExpr(const ValueIdSet & leafValues); |
| |
| void addToJoinedCols (const ValueIdSet & newPredCols) |
| { joinedCols_.insert( newPredCols ); } |
| |
| void clearJoinedCols () |
| { joinedCols_.clear(); } |
| |
| ValueIdSet getJoinedCols() { return joinedCols_; } |
| |
| CostScalar getUecOfJoiningCols(ValueIdSet & joinedColSet) const; |
| |
| // Returns the minimum UEC from the given column set |
| CostScalar getMinUec(const ValueIdSet & baseColSet) const; |
| |
| // Returns the maximum UEC from the given column set |
| CostScalar getMaxUecForCaseExpr(const ValueIdSet & baseColSet) const; |
| |
| // Returns the maximum UEC from the given leaf value set |
| CostScalar getMaxUec(const ValueIdSet & leafValueSet) const; |
| |
| |
| // returns cardinality of busiest stream based on the given list |
| // of histograms |
| |
| CostScalar getCardOfBusiestStream(const PartitioningFunction* partFunc, |
| Lng32 numOfParts, |
| GroupAttributes * grpAttr, |
| Lng32 countOfCPUs = 1); |
| |
| CostScalar getCardOfBusiestStreamForUnderNJ(CANodeIdSet * outerNodeSet, |
| const PartitioningFunction* pf, |
| Lng32 numOfParts, |
| GroupAttributes * gr, |
| Lng32 countOfCpus = 1); |
| |
| void addRecentlyJoinedCols(CollIndex startIdx, |
| CollIndex stopIdx); |
| |
| void compressColStatsToSingleInt(); |
| |
| void insertByPosition(const StatsList & other, |
| const NAColumnArray &columnList, |
| const ValueIdList &tableColList); |
| |
| private: |
| // --------------------------------------------------------------------- |
| // The following five routines are private subroutines of |
| // estimateCardinality() : |
| // |
| // the first three are used to apply different types of predicates to |
| // the ColStatDescList that calls them; |
| // |
| // the fourth tries to use multi-column uec information for |
| // adjusting rowcount estimation in the case of applying multiple |
| // predicates for highly correlated columns within the same table. |
| // |
| // the fifth tries to use multi-column uec information for |
| // multiple-column joins. |
| // --------------------------------------------------------------------- |
| |
| // --------------------------------------------------------------------- |
| // Given a VEG predicate, merge all histograms belonging to the same |
| // equivalence class. |
| NABoolean applyVEGPred (ItemExpr *VEGpred, |
| CostScalar & newRowcount, |
| CollIndex & numOuterColStats, |
| MergeType mergeMethod = INNER_JOIN_MERGE, |
| OperatorTypeEnum exprOpCode = ITM_FIRST_ITEM_OP, |
| CostScalar *maxSelectivity=NULL); |
| // --------------------------------------------------------------------- |
| // Apply a bi-relational predicate to the set of column statistics. |
| NABoolean applyPred (ItemExpr *biRelatpred, |
| CostScalar & newRowcount, |
| CollIndex & numOuterColStats, |
| MergeType mergeMethod = INNER_JOIN_MERGE, |
| OperatorTypeEnum exprOpCode = ITM_FIRST_ITEM_OP, |
| CostScalar *maxSelectivity=NULL); |
| |
| // --------------------------------------------------------------------- |
| // Apply a predicate having 'default' selectivity to the given column |
| // statistics. |
| void applyDefaultPred (ItemExpr * pred, CostScalar & newRowcount, |
| OperatorTypeEnum exprOpCode = ITM_FIRST_ITEM_OP, |
| CostScalar *maxSelectivity=NULL); |
| |
| // --------------------------------------------------------------------- |
| // Apply BiLogicPreds ITM_OR and ITM_AND |
| |
| CostScalar applyBiLogicPred(CostScalar & tempRowCount, |
| ValueIdSet & BiLogicPreds, |
| const ValueIdSet & outerReferences, |
| const Join * expr, |
| const SelectivityHint * selHint, |
| const CardinalityHint * cardHint, |
| CollIndex & numOuterColStats, |
| ValueIdSet & unresolvedPreds, |
| MergeType mergeMethod, |
| NAHashDictionary<ValueId, CostScalar> & biLogicPredReductions, |
| OperatorTypeEnum exprOpCode = ITM_FIRST_ITEM_OP, |
| CostScalar *maxSelectivity=NULL); |
| |
| // --------------------------------------------------------------------- |
| // Use multi-column uec to find the resulting rowcount from multiple |
| // predicates on correlated columns within a single table, if possible. |
| void useMultiUecIfCorrelatedPreds ( |
| CostScalar & newRowcount, // in/out |
| const CostScalar & oldRowcount, // in |
| CollIndex predCount, // in : quick check : proceed if >=2 |
| const CollIndexList &joinHistograms, // in : histograms used in MC Join |
| CollIndex startIndex, // in : 1st idx of CSDL to look at |
| CollIndex stopIndex, // in : idx of CSDL+1 to look at |
| NAHashDictionary<ValueId, CostScalar> & biLogicPredReductions); |
| |
| // --------------------------------------------------------------------- |
| // Use multi-column uec to find the resulting rowcount from a |
| // multi-column join between two tables, if possible. |
| void useMultiUecIfMultipleJoins ( |
| CostScalar & newRowcount, /* in/out */ |
| const CostScalar & oldRowcount, /* in */ |
| CollIndex startIndex, /* in : first index of CSDL */ |
| CollIndex stopIndex, /* in : last index of CSDL+1 */ |
| CollIndexList & joinHistograms, /* out */ |
| const Join * expr, |
| MergeType mergeMethod |
| ); |
| |
| // --------------------------------------------------------------------- |
| void computeRowRedFactor(MergeType mergeMethod, |
| CollIndex numOuterColStats, |
| CostScalar rowcountBeforePreds, |
| CollIndex & predCountSC, |
| CollIndex & predCountMC, |
| CostScalar & rowRedProduct); |
| |
| public: |
| |
| CostScalar getHighestUecReductionByLocalPreds(ValueIdSet &cols) const; |
| |
| // synchronize the RowCount of all the histograms in the list (those |
| // with array indices from 0..loopLimit-1) |
| void synchronizeStats ( const CostScalar & baseRowcount, |
| const CostScalar & newRowcount, |
| CollIndex loopLimit ) ; |
| |
| // this version doesn't care what the original rowcount was originally |
| // supposed to be -- it just does the work of setting all histograms to |
| // have newRowcount as the rowcount |
| void synchronizeStats ( const CostScalar & newRowcount, |
| CollIndex loopLimit ) ; |
| |
| // Used only by Join::synthEstLogProp to do inner-equi-joins of any |
| // columns appearing as outer references from both children of the |
| // join |
| CostScalar mergeListPairwise() ; |
| |
| // Used for mapping a histogram to a range-partitioned table so that we |
| // can determine which partitions in a query are active |
| // -- returns TRUE if everything's OK, FALSE otherwise |
| NABoolean divideHistogramAtPartitionBoundaries |
| (const ValueIdList & listOfPartKeys, /*in*/ |
| const ValueIdList & listOfPartKeyOrders,/*in*/ |
| const LIST(EncodedValueList *) & listOfPartBounds, /*in*/ |
| ValueId & keyCorrespondingToOutputRows, /*out*/ |
| NABoolean & isKeyAscending, /*out*/ |
| ColStats & outputRows, /*out*/ |
| CollIndexList & outputFactors) const ; /*out*/ |
| |
| |
| // ------------------------------------------------------------------------ |
| // methods to access individual CSD's within the CSDL via various |
| // indexing methods (e.g., via ValueId, ...) |
| // |
| // Currently we have six of these : |
| // . getColStatDescIndexForColumn() |
| // . getColStatDescIndex() |
| // . getColStatsPtrForColumn() |
| // . getColStatsPtrForPredicate() |
| // . getColStatsPtrForVEGGroup() |
| // . getSingleColStatsForVEGPred() |
| // ------------------------------------------------------------------------ |
| |
| NABoolean getColStatDescIndexForColumn (CollIndex& index, /* output */ |
| const ValueId& column) const ; |
| |
| NABoolean getColStatDescIndexForColumn (CollIndex& index, /* output */ |
| const ValueId& column, |
| NAColumnArray& partKeyColArray) const ; |
| |
| // Get the Index of the ColStatDesc which has the given |
| // valueID as a VEGColumn. Does not assume that 'value' |
| // must be a BASECOL, VEGREF, etc. |
| // |
| NABoolean getColStatDescIndex (CollIndex& index, /* output */ |
| const ValueId& value) const ; |
| |
| // This is used in the scan costing: |
| // Returns the ColStats corresponding to the given column |
| // This should be used to find the single column histogram |
| // associated with columns. It returns NULL if the |
| // histogram does not exist. |
| ColStatsSharedPtr getColStatsPtrForColumn (const ValueId& column) const ; |
| |
| ColStatsSharedPtr getColStatsPtrForPredicate (const ValueId& predicate) const ; |
| |
| ColStatsSharedPtr getColStatsPtrForVEGGroup (const ValueIdSet& VEGGroup) const ; |
| |
| // This is used in join costing: |
| // Given one VEG predicate, retrieve the first single column ColStats with |
| // a column in the VEG predicate. |
| ColStatsSharedPtr getSingleColStatsForVEGPred (const ValueId& VEGPred) const ; |
| |
| |
| // ------------------------------------------------------------------------ |
| // internal consistency checking |
| // ------------------------------------------------------------------------ |
| |
| // Verify that the CSDL's internal semantics are being maintained! |
| // --> loop from array index startIndex..endIndex-1 |
| void verifyInternalConsistency (CollIndex startIndex, CollIndex endIndex) const ; |
| |
| // Enforce the CSDL's internal semantics if they're not being maintained! |
| // --> loop from array index startIndex..endIndex-1 |
| // --> if "printNoStatsWarning" is TRUE, then print the 6008's re: no stats |
| // when there should be |
| void enforceInternalConsistency (CollIndex startIndex, |
| CollIndex endIndex, |
| NABoolean printNoStatsWarning = FALSE) ; |
| |
| |
| // ------------------------------------------------------------------------ |
| // looking at aggregate attributes over all ColStatDesc's in the CSDL |
| // ------------------------------------------------------------------------ |
| |
| // return the set of all applied preds to members |
| ValueIdSet appliedPreds () const; |
| |
| // return the set of all VEGColumns of members |
| ValueIdSet VEGColumns () const; |
| |
| |
| // ------------------------------------------------------------------------ |
| // displaying debugging information |
| // ------------------------------------------------------------------------ |
| |
| // Display each colstatdesc in the list: |
| void print (ValueIdList selectListCols, |
| FILE *f = stdout, |
| const char * prefix = DEFAULT_INDENT, |
| const char * suffix = "", |
| CollHeap *c=NULL, char *buf=NULL, |
| NABoolean hideDetail=FALSE) const ; |
| void display () const ; |
| |
| // used by RelExpr::showQueryStats() |
| void showQueryStats(CollHeap *c, char *buf, ValueIdList selectListCols) const |
| { print(selectListCols, stdout, DEFAULT_INDENT, "", c, buf, |
| TRUE/*hideDetail*/); |
| } |
| |
| // a utility routine used by mergeStats and applyDefaultPred |
| NABoolean identifyMergeCandidates(ItemExpr * VEGpred, |
| CollIndex & rootStatIndex, |
| CollIndexList & statsToMerge) const ; |
| |
| // It is a helper method used while computing left joins. The method locates |
| // histograms that have been joined to right child using inner join and |
| // now need to be null augmented to simulate the left join |
| // --------------------------------------------------------------------------- |
| NABoolean |
| locateHistogramToNULLAugment(ValueIdSet EqLocalPreds, |
| NAList<CollIndex> &statsToMerge, |
| CollIndex &rootStatIndex, |
| CollIndex outerRefCount); |
| |
| // -------------------------------------------------------------------- |
| // It is a helper method used by left joins. It merges the rows |
| // from the left side that did not match the right child back into |
| // the joined histograms. |
| // ------------------------------------------------------------------- |
| CostScalar |
| computeLeftOuterJoinRC(NABoolean &foundFlag /*in and out*/, |
| const ColStatDescList &leftColStatsList, |
| CollIndex rootStatIndex); |
| |
| // -------------------------------------------------------------------- |
| // It is a helper method used by full outer joins. It merges the rows |
| // from the right side that did not match the left child back into |
| // the joined histograms. |
| // ------------------------------------------------------------------- |
| CostScalar |
| computeFullOuterJoinRC(NABoolean &foundFlag /*in and out*/, |
| const ColStatDescList &origColStatsList, |
| CollIndex rootStatIndex); |
| |
| // ---------------------------------------------------------------------------------- |
| // This is a helper method used by left joins. |
| // The method is called after the the rows from the left histograms of the joining column, |
| // that did not match the right side are merged back into the join result. |
| // In the following method, the histograms from the remaining columns are synchronized |
| // to have the same row count |
| // -------------------------------------------------------------------------------- |
| void |
| synchronizeHistsWithOJRC(NAList<CollIndex> &statsToMerge, |
| CollIndex startIndex, |
| CollIndex stopIndex, |
| CollIndex rootStatIndex, |
| const ColStatDescList &leftColStatsList, |
| CostScalar &oJoinResultRows, |
| CostScalar &baseRows); |
| |
| // ----------------------------------------------------------------------- |
| // This is a helper method for left joins. It is used to |
| // NULL instantiate the right histogram rows from the other side with NULLs |
| // ------------------------------------------------------------------------ |
| void |
| nullInstantiateHists(CollIndex startIndex, |
| CollIndex stopIndex, |
| CostScalar &oJoinResultRows, |
| ValueIdList &nulledVIds); |
| |
| const MCSkewedValueList * getMCSkewedValueListForCols(ValueIdSet cols, ValueIdList &colGroup); |
| CostScalar getAvgRowcountForNonSkewedMCValues(ValueIdSet cols, MCSkewedValueList* mCSkewedValueList); |
| |
| private: |
| // a utility routine used by applyPred |
| NABoolean identifyMergeCandidates(ItemExpr * operand, |
| CollIndexList & statsToMerge) const ; |
| |
| // a utility routine for merging a specified set of members in the given |
| // ColStatDescList |
| void mergeSpecifiedStatDescs (const CollIndexList & statsToMerge, |
| CollIndex rootIndex, |
| MergeType mergeMethod, |
| CollIndex numOuterColStats, |
| CostScalar & newRowcount, |
| CostScalar & newUec, |
| NABoolean forVEGPred, |
| OperatorTypeEnum opType = ITM_FIRST_ITEM_OP) ; |
| |
| // Prior to any Join's predicate analysis, a ColStatDescList is built |
| // that contains the cross-product of the left and right tables. |
| // |
| // Knowledge of that cross-producting is incorporated in formulas that |
| // evaluate VEG and Equality predicates. |
| |
| |
| // We maintain multi-column uec information in order to accurately |
| // estimate rowcounts for joins involving multiple predicates, the |
| // output of a groupby aggregate, and possibly other cases I'm not |
| // thinking of right now. This data member should be used completely |
| // read-only; that is, only one such list exists for all |
| // ColStatDescList's, and no CSDL has the rights to modify it. |
| |
| MultiColumnUecList * uecList_ ; |
| |
| MultiColumnSkewedValueLists * mcSkewedValueLists_; |
| |
| // We also maintain a flag to indicate that we could have used multi-column |
| // information, but as it is unavailable, we shall put a cap on the lower |
| // bound of the join cardinality to ensure that we do not underestimate |
| |
| NABoolean useCapForLowBound_; |
| |
| // The following flag specifies if the optimizer should use frequency of |
| // joining columns to uplift join cardinality. The flag is set only if the |
| // join is being done on one column |
| |
| NABoolean joinOnSingleCol_; |
| |
| // We want to cache the totalRowCount after applying local predicates |
| // on a table, without using hints. This will be used in computing the |
| // cardinality for an index when some cardinality or selectivity hint |
| // is given by the user. |
| |
| CostScalar scanRowCountWithoutHint_; |
| |
| ValueIdSet joinedCols_; |
| |
| // the following are methods for accessing ColStatDescList data members |
| public: |
| inline void setUecList (const MultiColumnUecList * list) |
| { |
| if ( list != NULL ) |
| uecList_ = const_cast<MultiColumnUecList*>(list) ; |
| } |
| |
| inline void insertIntoUecList (const MultiColumnUecList * other) |
| { |
| if ( uecList_ == NULL ) |
| setUecList (other) ; |
| else |
| uecList_->insertList (other) ; |
| } |
| |
| inline const MultiColumnUecList * getUecList() const { return uecList_ ; } |
| |
| inline MultiColumnUecList * uecList() { return uecList_ ; } |
| |
| inline void setMCSkewedValueLists (const MultiColumnSkewedValueLists* list) |
| { |
| if ( list != NULL ) |
| mcSkewedValueLists_ = const_cast<MultiColumnSkewedValueLists*>(list) ; |
| } |
| |
| inline const MultiColumnSkewedValueLists * getMCSkewedValueLists() const { return mcSkewedValueLists_ ; } |
| |
| inline MultiColumnSkewedValueLists * mcSkewedValueLists() { return mcSkewedValueLists_ ; } |
| |
| inline NABoolean isCapForLowBound () const { return useCapForLowBound_; } |
| |
| inline void setCapForLowBound (NABoolean flag=TRUE) { useCapForLowBound_ = flag; } |
| |
| inline NABoolean isJoinOnSingleCol () const { return joinOnSingleCol_; } |
| |
| inline void setJoinOnSingleCol (NABoolean flag = TRUE) { joinOnSingleCol_ = flag; } |
| |
| void setInputCard (CostScalar rows); |
| |
| inline CostScalar getScanRowCountWithoutHint () const { return scanRowCountWithoutHint_; } |
| |
| inline void setScanRowCountWithoutHint (CostScalar scanRowCountWithoutHint) |
| { scanRowCountWithoutHint_ = scanRowCountWithoutHint; } |
| }; |
| |
| |
| #endif /* COLSTATDESC_H */ |