| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| #ifndef _SCAN_OPTIMIZER_H |
| #define _SCAN_OPTIMIZER_H |
| /* -*-C++-*- |
| ***************************************************************************** |
| * |
| * File: ScanOptimizer.h |
| * RCS: |
| * Description: Compute the cost and generate the appropiate key for the |
| * the scan. |
| * Code location: ScanOptimizer.C |
| * |
| * Created: //96 |
| * Language: C++ |
| * |
| * Purpose: Simple Cost Vector Reduction changes to class |
| * FileScanBasicCost |
| * |
| |
| * |
| * |
| * |
| ***************************************************************************** |
| */ |
| // ----------------------------------------------------------------------- |
| |
| #include "GroupAttr.h" |
| #include "RelExpr.h" |
| #include "RelScan.h" |
| #include "disjuncts.h" |
| #include "Cost.h" |
| #include "CostScalar.h" |
| #include "opt.h" |
| #include "NABasicObject.h" |
| #include "ColStatDesc.h" |
| #include "Stats.h" |
| |
| // ----------------------------------------------------------------------- |
| // forward declarations |
| // ----------------------------------------------------------------------- |
| |
| class FileScanBasicCost; |
| class FileScanCostList; |
| |
| void |
| removeConstantsFromTargetSortKey(ValueIdList* targetSortKey, |
| ValueIdMap* map) ; |
| NABoolean |
| isOrderedNJFeasible(ValueIdList leftKeys, ValueIdList rightKeys) ; |
| |
| NABoolean |
| ordersMatch(const InputPhysicalProperty* ipp, |
| const IndexDesc* indexDesc, |
| const ValueIdList* innerOrder, |
| const ValueIdSet& charInputs, |
| NABoolean partiallyInOrderOK, |
| NABoolean& probesForceSynchronousAccess, |
| |
| // if TRUE, do not assert, just return FALSE. |
| NABoolean noCmpAssert = FALSE); |
| |
| // getDp2CacheSizeInBlocks() |
| // Given a block size, this method returns the number of blocks in |
| // cache for blocks of that size. |
| // |
| CostScalar getDP2CacheSizeInBlocks(const CostScalar& blockSizeInKb); |
| |
| // ----------------------------------------------------------------------- |
| // class Histograms facilitate the deep copy and use of ColStatDescList's |
| // ----------------------------------------------------------------------- |
| class Histograms : public NABasicObject { |
| public: |
| |
| Histograms(CollHeap* heap):colStatDescList_(heap) {} |
| Histograms():colStatDescList_(CmpCommon::statementHeap()) {} |
| Histograms(const ColStatDescList& colStatDescList); |
| |
| virtual ~Histograms(); |
| |
| // ----------------------------------------------------------------------- |
| // --- Accessors: |
| // ----------------------------------------------------------------------- |
| // get the number of histograms for this table: |
| CollIndex entries() const |
| { return colStatDescList_.entries(); } |
| |
| // get the ColStatDesc in the i-th position: |
| const ColStatDesc& operator[](CollIndex i) const |
| { return *(colStatDescList_[i]); } |
| |
| // Check whether a ColStatDesc for a given column already exists: |
| NABoolean contains(ValueId & column) const |
| { return colStatDescList_.contains(column); } |
| |
| |
| // get the number of total rows in the table after |
| // predicates were applied: |
| CostScalar getRowCount() const; |
| |
| |
| NABoolean getColStatDescForColumn(CollIndex index, /* out */ |
| const ValueId& column) const; |
| |
| const ColStats& getColStatsForColumn(const ValueId& column) const; |
| // $$$ this next method is to work around the fact |
| // $$$ that prototype hist. code do not support fake histograms |
| ColStatsSharedPtr getColStatsPtrForColumn(const ValueId& column) const; |
| // get multiColUecCount for set of columns, would work even for single |
| // columns as |
| CostScalar getUecCountForColumns(const ValueIdSet& columns) const; |
| // ----------------------------------------------------------------------- |
| // This method returns the ColStatsSharedPtr for the ColStats that references |
| // the given predicate if it exists, otherwise it returns NULL |
| // ----------------------------------------------------------------------- |
| ColStatsSharedPtr getColStatsPtrForPredicate(const ValueId& predicate) const |
| { return colStatDescList_.getColStatsPtrForPredicate(predicate); } |
| |
| // Returns TRUE if at least one of the histograms is a |
| // fake histogram. A fake histogram is a histogram that |
| // was synthesized by ColStats from info. other than statistics. |
| // We need to test for this because we don't want to cost |
| // MDAM relying on fake histograms. MDAM must not be chosen |
| // when there are fake histograms. |
| NABoolean containsAtLeastOneFake() const; |
| |
| |
| // $$$ In theory, there should always be at least |
| // on ColStats for every column of a base |
| // table, even if no statistics has been collected, |
| // However, as of today (04/11/97), this is not true. |
| // Therefore, to avoid braking the regressions |
| // I will test for this case using the function below. |
| // When this situation is corrected, this function should |
| // always return FALSE. |
| NABoolean isEmpty() const |
| { return colStatDescList_.isEmpty(); } |
| |
| |
| // ----------------------------------------------------------------------- |
| // Mutators |
| // ----------------------------------------------------------------------- |
| void append(const ColStatDescSharedPtr& colStatDesc); |
| |
| // we need some way of propagating the multi-column uec list |
| inline void setCSDLUecList (const MultiColumnUecList * other) |
| { |
| colStatDescList_.insertIntoUecList (other) ; |
| } |
| |
| |
| // ----------------------------------------------------------------------- |
| // isAnIndexJoin returns TRUE when this histogram and |
| // the histogram in the inputEstLogProp represent an index join, |
| // it returns true otherwise. The optional parameter |
| // innerColumnListToRemovePtr must be a properly allocated, |
| // empty, ValueIdListPtr when non-null. If non-null, it returns |
| // the indices of those histograms in this that are also |
| // in the inputEstLogProp (i.e. they refer to index columns) |
| // ----------------------------------------------------------------------- |
| NABoolean |
| isAnIndexJoin(const EstLogProp& inputEstLogProp |
| ,LIST(CollIndex) *innerIndexColumnListPtr=NULL) const; |
| |
| // ----------------------------------------------------------------------- |
| // Utility functions |
| // ----------------------------------------------------------------------- |
| |
| void displayHistogramForColumn(const ValueId& column) const; |
| |
| void display() const; |
| |
| void print (FILE *f = stdout, |
| const char * prefix = DEFAULT_INDENT, |
| const char * suffix = "") const; |
| |
| |
| |
| //---- Mutators: |
| |
| // Apply a set of predicates to this histogram |
| void applyPredicates(const ValueIdSet& predicates, |
| const RelExpr & scan, |
| const SelectivityHint * selHint = NULL, |
| const CardinalityHint * cardHint = NULL, |
| OperatorTypeEnum opType = ITM_FIRST_ITEM_OP); |
| // Apply a single predicate to this histogram |
| void applyPredicate(const ValueId& predicate, |
| const RelExpr & scan, |
| const SelectivityHint * selHint = NULL, |
| const CardinalityHint * cardHint = NULL, |
| OperatorTypeEnum opType = ITM_FIRST_ITEM_OP); |
| |
| // ----------------------------------------------------------------------- |
| // Use isMDAM = FALSE when using this in single subset |
| // Use isMDAM = TRUE when using this in MDAM. In this case |
| // the routine will assume that we are not in an |
| // index join (it should be the case because |
| // code in the early stages should make sure that |
| // we don't attempt MDAM in an index join). This is |
| // not a correctness issue, it is an efficiency issue. |
| // ----------------------------------------------------------------------- |
| void |
| applyPredicatesWhenMultipleProbes( |
| const ValueIdSet& predicates |
| ,const EstLogProp& inputEstLogProp |
| ,const ValueIdSet& inputValues |
| ,const NABoolean isMDAM |
| ,const SelectivityHint * selHint=NULL |
| ,const CardinalityHint * cardHint=NULL |
| ,NABoolean * isAnIndexJoin=NULL |
| ,OperatorTypeEnum opType = ITM_FIRST_ITEM_OP); |
| |
| |
| |
| const ColStatDescList& getColStatDescList() const; |
| |
| protected: |
| |
| |
| ColStatDescList colStatDescList_; |
| |
| |
| }; // Histograms |
| |
| |
| // ----------------------------------------------------------------------- |
| // Class IndexDescHistograms |
| // Encapsulates histogram handling for scan costing of key sequenced files |
| // ----------------------------------------------------------------------- |
| class IndexDescHistograms : public Histograms { |
| public: |
| // The constructor creates a ColStatDescList |
| // from the IndexDesc raw histograms. It will contain |
| // columnPosition entries; one entry for each column in |
| // all positions <= columnPosition. |
| // 1 <= columnPosition <= number of key columns in indexDesc key |
| IndexDescHistograms(const IndexDesc& indexDesc, |
| const CollIndex columnPosition); |
| |
| // ----------------------------------------------------------------------- |
| // Accessors: |
| // ----------------------------------------------------------------------- |
| |
| const IndexDesc& getIndexDesc() const |
| { return indexDesc_; } |
| |
| |
| NABoolean isMultiColUecInfoAvail()const; |
| |
| // ----------------------------------------------------------------------- |
| // Mutators: |
| // ----------------------------------------------------------------------- |
| |
| |
| CostScalar |
| computeFailedProbes(const Histograms& outerHistograms |
| ,const ValueIdSet& keyPreds |
| ,const ValueIdSet& inputValues |
| ,const ValueIdSet& operatorValues |
| ) const; |
| |
| // Add a histogram to the colStatDescList. This also |
| // sinchronizes the histogram we are adding with the |
| // existing histograms in IndexDescHistograms |
| void appendHistogramForColumnPosition(const CollIndex& columnPosition); |
| |
| //------------------------------------------------------------------------- |
| //This function is used to get a better estimate of how many uec's that need |
| //to be skipped by MDAM for a specific column. It can compute this number |
| //if there is multi column uec information for the columns up to the |
| //column under consideration. Then it needs uec/multi-col uec information of |
| //some preceding columns. |
| //Example : Columns a, b, c, d . We are trying to compute the estimated uec |
| //for d. Answer: |
| // can be multicolUec for ((a and/or b and/or c) and d) |
| // ---------------------------- |
| // multicolUec for a and/or b and/or c(best we can do is a,b,c) |
| // |
| //if the denominator is a,b,c then the numerator must be a,b,c,d |
| // we must have comparable sets as numerator and denominator |
| //Input: columnOrderList which has the columnIdList for the index/table |
| // indexOfcolum in the order list that we have to compute uec info for |
| //Output: estimateduec for the column and true as return value |
| // if there isn't enough information then we return false. |
| //-------------------------------------------------------------------------- |
| NABoolean estimateUecUsingMultiColUec( |
| const ColumnOrderList& keyPredsByCol,/*in*/ |
| const CollIndex& indexOfColumn,/*in*/ |
| CostScalar& estimatedUec/*out*/); |
| |
| private: |
| const IndexDesc& indexDesc_; |
| |
| |
| }; // IndexDescHistograms |
| |
| |
| |
| // ----------------------------------------------------------------------- |
| // Class ScanOptimizer |
| // This is a helper class that performs the following tasks: |
| // 1.- Generates the different possible keys for a Scan |
| // 2.- Estimates the cost for every key |
| // 3.- Chooses the cheapest key and deletes all others |
| // 4.- Returns the cheapest key and its cost |
| // ----------------------------------------------------------------------- |
| class MdamTrace; |
| class ScanOptimizer : public NABasicObject // Abstract class |
| { |
| friend class MdamTrace; |
| public: |
| enum ncmRowSizeFactorType {TUPLES_ROWSIZE_FACTOR = 0, |
| SEQ_IO_ROWSIZE_FACTOR, |
| RAND_IO_ROWSIZE_FACTOR}; |
| ScanOptimizer(const FileScan& associatedFileScan |
| ,const CostScalar& resultSetCardinality |
| ,const Context& myContext |
| ,const ValueIdSet& externalInputs); |
| |
| virtual ~ScanOptimizer(); |
| |
| // ----------------------------------------------------------------------- |
| // Accessors |
| // ----------------------------------------------------------------------- |
| |
| virtual CollIndex getNumActivePartitions() const; |
| virtual CollIndex getEstNumActivePartitionsAtRuntime() const; |
| virtual CollIndex getNumActiveDP2Volumes() const; |
| virtual CollIndex getEstNumActivePartitionsAtRuntimeForHbaseRegions() const; |
| |
| Lng32 getNumberOfBlocksToReadPerAccess() const |
| { |
| // make sure that value has been initialized before using it: |
| CMPASSERT(numberOfBlocksToReadPerAccess_ > -1); |
| return numberOfBlocksToReadPerAccess_; |
| } |
| |
| const CostScalar getEstRowsAccessed() const |
| { |
| return estRowsAccessed_; |
| } |
| |
| virtual Cost * optimize(SearchKey *& searchKeyPtr // out |
| ,MdamKey *&mdamKeyPtr // out |
| ) = 0; |
| |
| // Return the appropriate Scan Optimizer to use for the given scan |
| // with the given context. |
| // |
| static ScanOptimizer * |
| getScanOptimizer(const FileScan& associatedFileScan |
| ,const CostScalar& resultSetCardinality |
| ,const Context& myContext |
| ,const ValueIdSet &externalInputs |
| ,CollHeap* heap = CmpCommon::statementHeap()); |
| |
| // For use of getScanOptimizer(). Made public for testing purposes. |
| // |
| static NABoolean |
| useSimpleFileScanOptimizer(const FileScan& associatedFileScan |
| ,const Context& myContext |
| ,const ValueIdSet &externalInputs); |
| |
| static NABoolean |
| canStillConsiderMDAM(const ValueIdSet partKeyPreds, |
| const ValueIdSet nonKeyColumnSet, |
| const Disjuncts &curDisjuncts, |
| const IndexDesc * indexDesc, |
| const ValueIdSet externalInputs); |
| |
| // get and set various probing counters for all partitions. |
| |
| // the total number of probes |
| const CostScalar getProbes() const { return probes_; }; |
| |
| // the number of probes returning data |
| const CostScalar getSuccessfulProbes() const { return successfulProbes_; }; |
| |
| // the number of probes that are unique (returning one row each) |
| const CostScalar getUniqueProbes() const { return uniqueProbes_; }; |
| |
| // the number of succssful probes returning more than one row |
| const CostScalar getDuplicateSuccProbes() const |
| { return duplicateSuccProbes_; }; |
| |
| // the number of tuples processed |
| const CostScalar getTuplesProcessed() const { return tuplesProcessed_; }; |
| |
| void setProbes(CostScalar x) { probes_ = x; }; |
| void setSuccessfulProbes(CostScalar x) { successfulProbes_ = x; }; |
| void setUniqueProbes(CostScalar x) { uniqueProbes_ = x; }; |
| void setDuplicateSuccProbes(CostScalar x) { duplicateSuccProbes_ = x; }; |
| void setTuplesProcessed(CostScalar x) { tuplesProcessed_ = x; }; |
| |
| protected: |
| |
| // get the pointer to the object with reusable simple cost vectors |
| // or a new object if sharable not found or list is empty. SP 09/18/00 |
| FileScanBasicCost* shareBasicCost(NABoolean &sharedCostFound); |
| |
| // ----------------------------------------------------------------------- |
| // This method computes the cost object out of the first row |
| // and last row cost vectors. It also factors in the effect |
| // of synchronous access, something that will be probably best |
| // done inside the Cost constructor |
| // ----------------------------------------------------------------------- |
| Cost* computeCostObject(const SimpleCostVector& firstRow |
| ,const SimpleCostVector& lastRow |
| ) const; |
| |
| |
| // Wrapper for SCM Cost constructor, used by SCM only. |
| Cost * scmCost( CostScalar tuplesProcessed, |
| CostScalar tuplesProduced, |
| CostScalar tuplesSent, |
| CostScalar ioRand, |
| CostScalar ioSeq, |
| CostScalar noOfProbes, |
| CostScalar input1RowSize, |
| CostScalar input2RowSize, |
| CostScalar outputRowSize, |
| CostScalar probeRowSize); |
| |
| CostScalar scmRowSizeFactor( CostScalar rowSize , |
| ncmRowSizeFactorType rowSizeFactoryType = TUPLES_ROWSIZE_FACTOR); |
| |
| // ----------------------------------------------------------------------- |
| // Use isMDAM = FALSE when using this in single subset |
| // Use isMDAM = TRUE when using this in MDAM. In this case |
| // the routine will assume that we are not in an |
| // index join (it should be the case because |
| // code in the early stages should make sure that |
| // we don't attempt MDAM in an index join). This is |
| // not a correctness issue, it is an efficiency issue. |
| // ----------------------------------------------------------------------- |
| virtual void |
| categorizeProbes(CostScalar& successfulProbes /* out */ |
| ,CostScalar& uniqueSuccProbes /* out */ |
| ,CostScalar& duplicateSuccProbes /* out */ |
| ,CostScalar& failedProbes /* out */ |
| ,CostScalar& uniqueFailedProbes |
| ,const CostScalar& probes |
| ,const ValueIdSet& preds |
| ,const Histograms& outerHistograms |
| ,const NABoolean isMDAM |
| ,CostScalar * dataRows = NULL |
| ) const; |
| |
| // Accesors: |
| |
| const CostScalar & getSingleSubsetSize() const |
| { |
| return singleSubsetSize_; |
| } |
| |
| const CostScalar getResultSetCardinality() const |
| { |
| return resultSetCardinality_; |
| } |
| |
| const CostScalar getIndexLevelsSeeks() const; |
| |
| |
| NABoolean getInOrderProbesFlag() const |
| { return inOrderProbes_; } |
| |
| NABoolean getProbesForceSynchronousAccessFlag() const |
| { return probesForceSynchronousAccess_; } |
| |
| const Context& getContext() const |
| { |
| return context_; |
| } |
| |
| // Mutators: |
| |
| void setSingleSubsetSize(const CostScalar & singleSubsetSize) |
| { |
| singleSubsetSize_ = singleSubsetSize; |
| } |
| |
| void setInOrderProbesFlag(NABoolean probesAreInOrder) |
| { inOrderProbes_ = probesAreInOrder; } |
| |
| void |
| setProbesForceSynchronousAccessFlag(NABoolean probesForceSynchronousAccess) |
| { probesForceSynchronousAccess_ = probesForceSynchronousAccess; } |
| |
| void setNumberOfBlocksToReadPerAccess(const Lng32& blocks) |
| { |
| DCMPASSERT(blocks > -1); |
| numberOfBlocksToReadPerAccess_ = blocks; |
| } |
| |
| void setEstRowsAccessed(CostScalar rows) |
| { |
| estRowsAccessed_ = rows; |
| } |
| |
| // With overflow checks |
| void setNumberOfBlocksToReadPerAccess(const CostScalar& blocks); |
| |
| |
| const RelExpr& getRelExpr() const |
| { |
| return fileScan_; |
| } |
| |
| const FileScan& getFileScan() const |
| { |
| return fileScan_; |
| } |
| |
| const IndexDesc* getIndexDesc() const |
| { |
| return fileScan_.getIndexDesc(); |
| } |
| |
| NABoolean isForwardScan() const |
| { |
| return (NOT fileScan_.getReverseScan()); |
| } |
| |
| NABoolean getMdamFlag() const |
| { |
| return fileScan_.getMdamFlag(); |
| } |
| |
| const Disjuncts& getDisjuncts() const |
| { |
| return fileScan_.getDisjuncts(); |
| } |
| |
| NABoolean isMdamForced() const |
| { |
| return isMdamForced(fileScan_, getContext()); |
| } |
| |
| // Static version, used by useSimpleFileScanOptimizer() |
| // |
| static NABoolean isMdamForced(const FileScan& fileScan |
| ,const Context& myContext); |
| |
| // Static method, used by useSimpleFileScanOptimizer() |
| // Determine whether MDAM is Forced ON, Forced OFF, or ENABLED. |
| // |
| ScanForceWildCard::scanOptionEnum |
| static getMdamStatus(const FileScan& fileScan |
| ,const Context& myContext); |
| |
| NABoolean isMdamEnabled() const; |
| |
| const ValueIdSet &getExternalInputs() const { return externalInputs_; } |
| |
| #ifndef NDEBUG |
| // for printing debug info: |
| |
| void |
| printCostObject(const Cost * costPtr) const; |
| #endif |
| |
| protected: |
| // For scans where we cost both single subset and MDAM scans, |
| // this is the number of rows in a single subset scan (before |
| // executor predicates are applied). |
| // |
| // TODO: Figure out generalizations for MultiProbe Scans |
| // |
| CostScalar singleSubsetSize_; |
| |
| // This is the total number of probes for all active partitions. |
| // The value is cached in categorizeMultiProbes(). |
| // |
| // For MultiProbe Scans |
| // |
| CostScalar probes_; |
| |
| // This is the number of probes (probes_) that produce some data. |
| // The value is cached in categorizeMultiProbes(). |
| // |
| // For MultiProbe Scans |
| // |
| CostScalar successfulProbes_; |
| |
| // This is the number of distinct probes (probes_). Includes |
| // successful and failed probes. The value is cached in |
| // categorizeMultiProbes(). |
| // |
| // For MultiProbe Scans |
| // |
| CostScalar uniqueProbes_; |
| |
| // This is the number of successful probes (successfulProbes_) that |
| // are not unique. duplicateSuccProbes = successfulProbes - |
| // uniqueSuccProbes. The value is cached in |
| // categorizeMultiProbes(). |
| // |
| // For MultiProbe Scans |
| // |
| CostScalar duplicateSuccProbes_; |
| |
| CostScalar tuplesProcessed_; |
| |
| private: |
| |
| // The associated FileScan node |
| const FileScan& fileScan_; |
| |
| // The cardinality of the synthesized statistics for the scan: |
| const CostScalar resultSetCardinality_; |
| |
| // Estimated number of Dp2 rows accessed: |
| CostScalar estRowsAccessed_; |
| |
| // The context in which the scan is being optimized: |
| const Context &context_; |
| |
| // In addition to the scan node's characteristic inputs there |
| // may be other inputs that we want to consider for key predicates, |
| // such as partition input variables. This is why we have a separate |
| // data member for the external inputs. |
| ValueIdSet externalInputs_; |
| |
| // Estimate of number of blocks that DP2 needs to read |
| // per access. This value is passed to DP2 by the executor, |
| // DP2 uses it to decide whether it will do read ahead |
| // or not. |
| // Its value is -1 if uninitialized |
| Lng32 numberOfBlocksToReadPerAccess_; |
| |
| // Indicates if the probes are completely in order, or partially |
| // in order, but cache is big enough so that we get the same |
| // benefit as if the probes were completely in order. |
| NABoolean inOrderProbes_; |
| |
| // Indicates if the probes are completely in order accross partitions, |
| // and so the access to multiple partitions of the inner table will |
| // be serialized. |
| NABoolean probesForceSynchronousAccess_; |
| |
| }; // class ScanOptimizer |
| |
| |
| // ----------------------------------------------------------------------- |
| // The class fileScanOptimizer performs several actions: |
| // 1.- Decides the access method for the scan |
| // 2.- Computes the cost for the access method |
| // 3.- Builds a key for the access method and attaches |
| // that key to the scan. |
| // ----------------------------------------------------------------------- |
| class MDAMCostWA; |
| class MDAMOptimalDisjunctPrefixWA; |
| class NewMDAMCostWA; |
| class NewMDAMOptimalDisjunctPrefixWA; |
| class FileScanOptimizer : public ScanOptimizer |
| { |
| friend class MDAMCostWA; |
| friend class MDAMOptimalDisjunctPrefixWA; |
| friend class NewMDAMCostWA; |
| friend class NewMDAMOptimalDisjunctPrefixWA; |
| friend class MdamTrace; |
| public: |
| |
| FileScanOptimizer(const FileScan& associatedFileScan |
| ,const CostScalar& resultSetCardinality |
| ,const Context& myContext |
| ,const ValueIdSet &externalInputs) : |
| ScanOptimizer(associatedFileScan |
| ,resultSetCardinality |
| ,myContext |
| ,externalInputs) |
| ,rawInnerHistograms_(*(associatedFileScan.getIndexDesc()), |
| associatedFileScan.getIndexDesc()->getIndexKey().entries()) |
| { |
| } |
| |
| virtual ~FileScanOptimizer(); |
| |
| // ----------------------------------------------------------------------- |
| // Accessors: |
| // ----------------------------------------------------------------------- |
| |
| const IndexDescHistograms& getRawInnerHistograms() const |
| { return rawInnerHistograms_; } |
| |
| // ----------------------------------------------------------------------- |
| // Mutators: |
| // ----------------------------------------------------------------------- |
| |
| // ----------------------------------------------------------------------- |
| // optimize performs several actions: |
| // 1.- Picks the best access method (single subset or MDAM) |
| // 2.- Creates the appropiate key |
| // 3.- Returns the appropiate key and makes sure only one key |
| // gets generated (Single subset key XOR MdamKey) |
| // 4.- Computes and returns the cost of the chosen access method. |
| // ----------------------------------------------------------------------- |
| virtual Cost * optimize(SearchKey*& searchKeyPtr // out |
| ,MdamKey*& mdamKeyPtr // out |
| ); |
| |
| private: |
| |
| // Pass the join histograms when available |
| void computeNumberOfBlocksToReadPerAccess(const Cost& scanCost, |
| NABoolean &isMDAM, |
| CostScalar numKBytes); |
| |
| void |
| computeIOForFullCacheBenefit( |
| CostScalar& seeks /* out */ |
| ,CostScalar& sequential_io /* out */ |
| ,const CostScalar& beginBlocksLowerBound |
| ,const CostScalar& totalBlocksLowerBound |
| ,const CostScalar& indexBlocks) const; |
| |
| void |
| computeSeekForDp2ReadAheadAndProbeOrder( |
| CostScalar& seekComputedWithDp2ReadAhead, |
| const CostScalar& finalRows, |
| const CostScalar& uniqueProbes, |
| const CostScalar& beginBlocksLowerBound, |
| const CostScalar& totalBlocksLowerBound, |
| const CostScalar& innerBlocksUpperBound, |
| const CostScalar& dp2CacheSize, |
| const NABoolean inOrderProbes) const; |
| |
| void |
| computeIOForRandomCase( |
| CostScalar& seeks /* out */ |
| ,CostScalar& sequential_io /* out */ |
| ,const CostScalar& blksPerSuccProbe |
| ,const CostScalar& beginBlocksLowerBound |
| ,const CostScalar& totalBlocksLowerBound |
| ,const CostScalar& successfulProbes |
| ,const CostScalar& failedProbes |
| ,const CostScalar& probes) const; |
| |
| void computeIOForFullTableScan( |
| CostScalar& dataRows /* out */ |
| ,CostScalar& seeks /* out */ |
| ,CostScalar& sequential_io /* out */ |
| ,const CostScalar& probes) const; |
| |
| void computeCostVectors( |
| SimpleCostVector& firstRow /* out */ |
| ,SimpleCostVector& lastRow /* out */ |
| ,CostScalar& seqKBytesPerScan /* out */ |
| ,const CostScalar& totalRows |
| ,const CostScalar& subsetRequests |
| ,const CostScalar& successfulSubsetRequests |
| ,const CostScalar& seeks |
| ,const CostScalar& sequential_io |
| ,const ValueIdSet& keyPredicates |
| ,const ValueIdSet& exePreds |
| ,const CostScalar& incomingProbes // probes incoming to the operator |
| ) const; |
| |
| void computeCostVectorsForMultipleSubset( |
| SimpleCostVector& firstRow /* out */ |
| ,SimpleCostVector& lastRow /* out */ |
| ,CostScalar& seqKBytesPerScan /* out */ |
| ,const CostScalar& totalRows |
| ,const CostScalar& subsetRequests |
| ,const CostScalar& successfulSubsetRequests |
| ,const CostScalar& seeks |
| ,const CostScalar& sequential_io |
| ,const ValueIdSet& keyPredicates |
| ,const ValueIdSet& exePreds |
| ,const CostScalar& incomingProbes // probes incoming to the operator |
| ,const CostScalar& mdamNetPredCnt // the sum of preds in all disjuncts |
| ) const; |
| |
| |
| // ----------------------------------------------------------------------- |
| // Computes the cost for non-MDAM case. It will return NULL if, |
| // while computing the cost, the predicate expression |
| // for any key column contains a CONFLICT and the |
| // breakOnConflictFlag is TRUE. This last thing |
| // is needed because MDAM knows how to resolve a conflict, |
| // however, it MUST be set to false when non-mdam is being |
| // forced. |
| // ----------------------------------------------------------------------- |
| Cost * computeCostForSingleSubset(SearchKey& searchKey /* in/out */ |
| ,const NABoolean& breakOnConflict |
| ,CostScalar & numKBytes |
| ); |
| |
| // ----------------------------------------------------------------------- |
| // Computes the cost for MDAM. It will return NULL if, |
| // while computing the cost, the cost exceeds or equals the |
| // cost bound provided as input. |
| // ----------------------------------------------------------------------- |
| Cost * computeCostForMultipleSubset(MdamKey* mdamKeyPtr /* in/out */ |
| ,const Cost* costBoundPtr |
| ,NABoolean mdamForced |
| ,CostScalar & numKBytes |
| ,NABoolean checkExePreds |
| ,NABoolean mdanTypeIsCommon |
| ,MdamKey*& sharedMdamKeyPtr |
| ); |
| Cost * oldComputeCostForMultipleSubset(MdamKey* mdamKeyPtr /* in/out */ |
| ,const Cost* costBoundPtr |
| ,NABoolean mdamForced |
| ,CostScalar & numKBytes |
| ,NABoolean checkExePreds |
| ,NABoolean mdanTypeIsCommon |
| ,MdamKey*& sharedMdamKeyPtr |
| ); |
| |
| Cost* newComputeCostForMultipleSubset |
| ( MdamKey* mdamKeyPtr, |
| const Cost * costBoundPtr, |
| NABoolean mdamForced, |
| CostScalar & numKBytes, |
| ValueIdSet exePreds, |
| NABoolean checkExePreds, |
| NABoolean mdamTypeIsCommon, |
| MdamKey *&sharedMdamKeyPtr ); |
| |
| Cost* scmComputeCostForSingleSubset(); |
| |
| Cost* scmRewrittenComputeCostForMultipleSubset |
| ( MdamKey* mdamKeyPtr, |
| const Cost * costBoundPtr, |
| NABoolean mdamForced, |
| CostScalar & numKBytes, |
| ValueIdSet exePreds, |
| NABoolean checkExePreds, |
| MdamKey *&sharedMdamKeyPtr ); |
| |
| Cost* scmComputeCostForMultipleSubset |
| ( MdamKey* mdamKeyPtr, |
| const Cost * costBoundPtr, |
| NABoolean mdamForced, |
| CostScalar & numKBytes, |
| ValueIdSet exePreds, |
| NABoolean checkExePreds, |
| NABoolean mdamTypeIsCommon, |
| MdamKey *&sharedMdamKeyPtr ); |
| |
| Cost* scmComputeMDAMCostForHbase |
| ( CostScalar& totalRows |
| ,CostScalar& seeks |
| ,CostScalar& sequential_io |
| ,CostScalar& incomingProbes ); |
| |
| #ifndef NDEBUG |
| void runMdamTests |
| ( const MdamKey* mdamKeyPtr, |
| const Cost * costBoundPtr, |
| NABoolean mdamForced, |
| ValueIdSet exePreds, |
| NABoolean checkExePreds, |
| NABoolean mdamTypeIsCommon |
| ); |
| #endif |
| |
| NABoolean isMultipleProbes() const; |
| const ScanForceWildCard* findScanForceWildCard() const; |
| CollIndex computeLastKeyColumnOfDisjunct(const ColumnOrderList & keyPredsByCol); |
| const CostScalar getIncomingProbes() const; |
| // return true if has resuable shared basic cost for this mdam |
| NABoolean getSharedCost(FileScanBasicCost * &fileScanBasicCostPtr /*out, never NULL*/ |
| ,NABoolean & hasLostBefore /*out*/ |
| ,SimpleCostVector * &disjunctsFRPtr /*out never NULL*/ |
| ,SimpleCostVector * &disjunctsLRPtr /*out never NULL*/ |
| ,CostScalar & numKBytes /*out*/ |
| ,MdamKey* & sharedMdamKeyPtr /*out*/ |
| ,NABoolean mdamTypeIsCommon /*in*/); |
| // ----------------------------------------------------------------------- |
| // Helper methods: |
| // ----------------------------------------------------------------------- |
| // ----------------------------------------------------------------------- |
| // Returns TRUE if the cost resulting from firstRow and lastRow |
| // exceeds or equals the given cost bound |
| // ----------------------------------------------------------------------- |
| NABoolean exceedsBound(const Cost *costBoundPtr |
| ,const SimpleCostVector& firstRow |
| ,const SimpleCostVector& lastRow |
| ) const; |
| |
| |
| NABoolean hasTooManyDisjuncts() const; |
| |
| NABoolean isMDAMFeasibleForHBase(const IndexDesc* idesc, ValueIdSet& preds); |
| |
| private: |
| |
| IndexDescHistograms rawInnerHistograms_; |
| |
| }; // class FileScanOptimizer |
| |
| // ----------------------------------------------------------------------- |
| // Class FileScanBasicCost |
| // 1.- The object of this class contains 3 pairs (for the first and |
| // last row goals) of simple cost vectors depending on the costing |
| // type: single subset, MDAM common, MDAM disjuncts. The purpose |
| // of this class is to reuse these simple cost vectors whenever |
| // possible like synchronous and asynchronous access to the same |
| // table. Every index descriptor will contain the list of such |
| // objects. If context of current FileScanOptimizer is similar |
| // to one in the list then corresponding firstRow and lastRow cost |
| // vectors will be used to compute CostObject. If not, a new object |
| // will be added to the list and its simple cost vectors will be |
| // computed using current context logical and physical properties. |
| // 2.- Provides mutator type acces to simple cost vectors and |
| // noExePreds flag that also has to be reused. |
| // 3.- hasSameBasicProperties() function checks if context that created |
| // this object for this IndexDesc and the context passed as a |
| // parameter have the same basic properties therefore - the same |
| // simple cost vectors |
| // ----------------------------------------------------------------------- |
| |
| class FileScanBasicCost : public NABasicObject |
| { |
| public: |
| |
| FileScanBasicCost(const Context * currentContext): |
| basicCostContext_(currentContext), |
| basicFRCostSingleSubset_(NULL), |
| basicLRCostSingleSubset_(NULL), |
| basicFRCostMdamCommon_(NULL), |
| basicLRCostMdamCommon_(NULL), |
| basicFRCostMdamDisjuncts_(NULL), |
| basicLRCostMdamDisjuncts_(NULL), |
| mdamCommonKeyPtr_(NULL), |
| mdamDisjunctsKeyPtr_(NULL), |
| mdamCommonLost_(FALSE), |
| mdamDisjunctsLost_(FALSE) |
| |
| {CMPASSERT(basicCostContext_ != NULL);} |
| |
| FileScanBasicCost(const FileScanBasicCost & other): |
| basicCostContext_(other.basicCostContext_), |
| basicFRCostSingleSubset_(other.basicFRCostSingleSubset_), |
| basicLRCostSingleSubset_(other.basicLRCostSingleSubset_), |
| basicFRCostMdamCommon_(other.basicFRCostMdamCommon_), |
| basicLRCostMdamCommon_(other.basicLRCostMdamCommon_), |
| basicFRCostMdamDisjuncts_(other.basicFRCostMdamDisjuncts_), |
| basicLRCostMdamDisjuncts_(other.basicLRCostMdamDisjuncts_), |
| mdamCommonKeyPtr_(other.getMdamKeyPtr(TRUE)), |
| mdamDisjunctsKeyPtr_(other.getMdamKeyPtr(FALSE)), |
| mdamCommonLost_(other.mdamCommonLost_), |
| mdamDisjunctsLost_(other.mdamDisjunctsLost_) |
| |
| {CMPASSERT(basicCostContext_ != NULL);} |
| |
| ~FileScanBasicCost() {}; |
| |
| SimpleCostVector & |
| getFRBasicCostSingleSubset() {return basicFRCostSingleSubset_;} |
| |
| SimpleCostVector & |
| getLRBasicCostSingleSubset() {return basicLRCostSingleSubset_;} |
| |
| SimpleCostVector & |
| getFRBasicCostMdamCommon() {return basicFRCostMdamCommon_;} |
| |
| SimpleCostVector & |
| getLRBasicCostMdamCommon() {return basicLRCostMdamCommon_;} |
| |
| SimpleCostVector & |
| getFRBasicCostMdamDisjuncts() {return basicFRCostMdamDisjuncts_;} |
| |
| SimpleCostVector & |
| getLRBasicCostMdamDisjuncts() {return basicLRCostMdamDisjuncts_;} |
| |
| MdamKey * getMdamKeyPtr(NABoolean mdamType) const |
| {return (mdamType ? mdamCommonKeyPtr_ : mdamDisjunctsKeyPtr_);} |
| |
| void setMdamKeyPtr(MdamKey *mdamKeyPtr, NABoolean mdamType) |
| { mdamType ? mdamCommonKeyPtr_ = mdamKeyPtr |
| : mdamDisjunctsKeyPtr_ = mdamKeyPtr;} |
| |
| NABoolean hasSameBasicProperties(const Context & currentContext) const; |
| |
| // To set the kbytes for singlesubset, common & disjunt |
| |
| void setSingleSubsetNumKBytes(CostScalar numKBytes) |
| { singleSubsetNumKBytes = numKBytes; } |
| |
| void setEstRowsAccessed(CostScalar estRows) |
| { estRowsAccessed_ = estRows; } |
| |
| void setMdamCommonNumKBytes(CostScalar numKBytes) |
| { mdamCommonNumKBytes = numKBytes; } |
| |
| void setMdamDisjunctsNumKBytes(CostScalar numKBytes) |
| { mdamDisjunctsNumKBytes = numKBytes; } |
| |
| // To get the kbytes from singlesubset, common * disjunt |
| |
| CostScalar getSingleSubsetNumKBytes() |
| { return singleSubsetNumKBytes; } |
| |
| CostScalar getEstRowsAccessed() |
| { return estRowsAccessed_; } |
| |
| CostScalar getMdamCommonNumKBytes() |
| { return mdamCommonNumKBytes; } |
| |
| CostScalar getMdamDisjunctsNumKBytes() |
| { return mdamDisjunctsNumKBytes; } |
| |
| NABoolean hasMdamCommonLost() const; |
| NABoolean hasMdamDisjunctsLost() const; |
| void setMdamCommonLost(NABoolean); |
| void setMdamDisjunctsLost(NABoolean); |
| |
| private: |
| |
| FileScanBasicCost(): |
| basicCostContext_(NULL), |
| basicFRCostSingleSubset_(NULL), |
| basicLRCostSingleSubset_(NULL), |
| basicFRCostMdamCommon_(NULL), |
| basicLRCostMdamCommon_(NULL), |
| basicFRCostMdamDisjuncts_(NULL), |
| basicLRCostMdamDisjuncts_(NULL), |
| mdamCommonKeyPtr_(NULL), |
| mdamDisjunctsKeyPtr_(NULL), |
| singleSubsetNumKBytes(csZero), |
| estRowsAccessed_(csZero), |
| mdamCommonNumKBytes(csZero), |
| mdamDisjunctsNumKBytes(csZero), |
| mdamCommonLost_(FALSE), |
| mdamDisjunctsLost_(FALSE) |
| |
| {CMPASSERT(basicCostContext_ != NULL);} |
| |
| const Context * basicCostContext_; |
| SimpleCostVector basicFRCostSingleSubset_; |
| SimpleCostVector basicLRCostSingleSubset_; |
| SimpleCostVector basicFRCostMdamCommon_; |
| SimpleCostVector basicLRCostMdamCommon_; |
| SimpleCostVector basicFRCostMdamDisjuncts_; |
| SimpleCostVector basicLRCostMdamDisjuncts_; |
| MdamKey * mdamCommonKeyPtr_; |
| MdamKey * mdamDisjunctsKeyPtr_; |
| NABoolean mdamCommonLost_; |
| NABoolean mdamDisjunctsLost_; |
| |
| // Three costScalars had to been defined to preserve |
| // Kbytes in order to calculate the blocksperaccess as |
| // the simple Cost Vector has been reduced to have only |
| // total time for IO. |
| |
| CostScalar singleSubsetNumKBytes; |
| CostScalar estRowsAccessed_; |
| CostScalar mdamCommonNumKBytes; |
| CostScalar mdamDisjunctsNumKBytes; |
| }; |
| |
| |
| // ----------------------------------------------------------------------- |
| // List of basic cost objects. Every index descriptor will have this |
| // list to provide acces to individual basic cost objects with th help |
| // of standard collection tools like insert(), entries() and index |
| // access( overloaded []) |
| // ----------------------------------------------------------------------- |
| |
| class FileScanCostList : public LIST (FileScanBasicCost *) |
| { |
| public: |
| FileScanCostList(NAMemory *h) : |
| LIST (FileScanBasicCost *) (h) {}; |
| }; |
| |
| |
| #endif |
| // eof |
| |