| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* -*-C++-*- |
| ****************************************************************************** |
| * |
| * File: SearchKey.C |
| * Description: Relational expressions (both physical and logical operators) |
| * Created: 07/31/95 |
| * Language: C++ |
| * |
| * |
| * |
| * |
| ****************************************************************************** |
| */ |
| |
| // ----------------------------------------------------------------------- |
| #include "NAAssert.h" |
| #include "ExprNode.h" |
| #include "OperTypeEnum.h" |
| #include "Collections.h" |
| #include "NAType.h" |
| #include "AllItemExpr.h" |
| #include "SearchKey.h" |
| #include "PartFunc.h" |
| #include "NAColumn.h" |
| #include "NATable.h" |
| #include "NumericType.h" |
| #include "HDFSHook.h" |
| #include "OptRange.h" |
| |
| // *********************************************************************** |
| // $$$ SearchKey |
| // *********************************************************************** |
| |
| void SearchKey::init(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates) |
| { |
| |
| // --------------------------------------------------------------------- |
| // Access the key columns of the given index. |
| // --------------------------------------------------------------------- |
| ValueIdSet setOfKeyPredicates; // accumulator for key predicates |
| ValueIdSet setOfNonKeyPredicates; // accumulator for non key predicates |
| // that are generated by the key builder |
| NABoolean keyPredicatesUnique; // boolean indicating whether key preds |
| // specify a unique row |
| |
| // --------------------------------------------------------------------- |
| // Allocate the search key work area for processing keyCount keys.. |
| // --------------------------------------------------------------------- |
| SearchKeyWorkSpace * workSpace = new(CmpCommon::statementHeap()) |
| SearchKeyWorkSpace |
| (keyColumns, |
| orderOfKeyValues, |
| forwardSearch, |
| *this); |
| |
| |
| // Compute the key predicates: |
| getKeyPredicates(setOfKeyPredicates); |
| |
| //setOfKeyPredicates could be empty |
| |
| if (setOfPredicates.entries() > 0 ) |
| { |
| ValueIdList selectionPredList(setOfPredicates); |
| if (CmpCommon::getDefault(RANGESPEC_TRANSFORMATION) == DF_ON && |
| getIndexDesc()->getPrimaryTableDesc()->getNATable()->isHbaseTable() == FALSE) |
| { |
| ItemExpr *inputItemExprTree = selectionPredList.rebuildExprTree(ITM_AND,FALSE,FALSE); |
| ItemExpr* resultOld = revertBackToOldTree(CmpCommon::statementHeap(), |
| inputItemExprTree); |
| setOfPredicates.clear(); |
| resultOld->convertToValueIdSet(setOfPredicates, NULL, ITM_AND, FALSE); |
| doNotReplaceAnItemExpressionForLikePredicates(resultOld,setOfPredicates,resultOld); |
| // Due to severity of impact in various areas like cardinality estimates etc. |
| // this method is not used. |
| //ValueIdSet resultSet; |
| //revertBackToOldTreeUsingValueIdSet(setOfPredicates, resultSet); |
| //setOfPredicates.clear(); |
| //ItemExpr* resultOld = resultSet.rebuildExprTree(ITM_AND,FALSE,FALSE); |
| //setOfPredicates += resultSet; |
| //doNotReplaceAnItemExpressionForLikePredicates(resultOld,setOfPredicates,resultOld); |
| //doNotReplaceAnItemExpression(resultOld,setOfPredicates,resultOld); |
| } |
| } |
| |
| |
| // --------------------------------------------------------------------- |
| // LOOP1 : for each index column ... |
| // |
| // Choose the predicates that are eligible for performing a search in |
| // the index. Compute the bounds (range of values) to be searched |
| // using each search predicate. |
| // --------------------------------------------------------------------- |
| workSpace->analyzeSearchPredicates(setOfKeyPredicates,externalInputs); |
| |
| |
| // --------------------------------------------------------------------- |
| // LOOP2 : for each eligible search predicate |
| // |
| // Finalize the upper and lower bounds for the search. |
| // If the predicate is FALSE, then we do not want to do a scan |
| // so in taht case, swap the begin and the end keys |
| |
| // --------------------------------------------------------------------- |
| NABoolean isPredFalse = FALSE; |
| for (ValueId predId = setOfPredicates.init(); |
| setOfPredicates.next(predId); |
| setOfPredicates.advance(predId)) |
| { |
| OperatorTypeEnum OpType = predId.getItemExpr()->getOperatorType(); |
| |
| if( OpType == ITM_RETURN_FALSE ) |
| { |
| isPredFalse = TRUE; |
| break; |
| } |
| else if ( OpType == ITM_CONSTANT ) |
| { |
| NABoolean negate; |
| ConstValue *cv = predId.getItemExpr()->castToConstValue(negate); |
| if( cv && cv->getType()->getTypeQualifier() == NA_BOOLEAN_TYPE ) |
| { |
| |
| //Since the scan will not result in any fruitful |
| //values we should not scan the entire subset which |
| // is a massive waste of resources on large tables. |
| // So stop such scans. One way is to switch the scan keys. |
| // We can achieve the same effect by switching the scan |
| // direction. |
| Int32 val = *((Int32*)cv->getConstValue()); |
| if( val == 0 ) |
| { |
| isPredFalse = TRUE; |
| break; |
| } |
| |
| } |
| } |
| |
| } |
| |
| if (isPredFalse) |
| workSpace->computeBeginKeyAndEndKeyValues(keyColumns, |
| externalInputs, |
| endKeyValues_, |
| beginKeyValues_, |
| keyPredicates_, |
| setOfNonKeyPredicates, |
| keyPredicatesUnique, |
| endKeyIsExclusive_, |
| beginKeyIsExclusive_, |
| endKeyExclusionExpr_, |
| beginKeyExclusionExpr_, |
| allBeginKeysMissing_, |
| allEndKeysMissing_, |
| allChosenPredsAreEqualPreds_ |
| ); |
| |
| |
| else |
| workSpace->computeBeginKeyAndEndKeyValues(keyColumns, |
| externalInputs, |
| beginKeyValues_, |
| endKeyValues_, |
| keyPredicates_, |
| setOfNonKeyPredicates, |
| keyPredicatesUnique, |
| beginKeyIsExclusive_, |
| endKeyIsExclusive_, |
| beginKeyExclusionExpr_, |
| endKeyExclusionExpr_, |
| allBeginKeysMissing_, |
| allEndKeysMissing_, |
| allChosenPredsAreEqualPreds_ |
| ); |
| |
| |
| // Find any full key predicate in setOfPredicates that refers |
| // to a key column and save the result in fullKeyPredicates_. |
| computeFullKeyPredicates(setOfPredicates); |
| |
| |
| // We do not want to remove the predicates which have two constants |
| // in their VEG. These should be evaluated like any other predicate. |
| |
| ValueIdSet keyPredsToBeEvaluated; |
| keyPredicates_.getVEGesWithMultipleConsts(keyPredsToBeEvaluated); |
| // --------------------------------------------------------------------- |
| // Subtract the key predicates from the given setOfPredicates. |
| // Add any extra selection expressions that are generated by the key |
| // builder, including the key predicates that should be evaluated |
| // separately |
| // |
| // The exception is for key predicates referencing numeric |
| // data types implemented with Big number. Because of limitation in |
| // the implementation of Big number with positive scale, truncation can |
| // happen to a key value when the scale is positive. So T.KeyCol = 12.3 |
| // is actually evaluated as T.keyCol = 12. To avoid it, we need to evaluate |
| // T.KeyCol = 12.3 as execute predicate. Note that when the key is a |
| // composite key, the key predicate is evalated as the executor predicate |
| // anyway. So here we just do the exclusion for composite key case. See |
| // SQ2535. |
| // --------------------------------------------------------------------- |
| if ( keyPredicates_.referencesBignumNumericDataType() == FALSE || |
| keyPredicates_.entries() > 1 ) |
| { |
| setOfPredicates -= keyPredicates_; |
| } |
| |
| setOfPredicates += keyPredsToBeEvaluated; |
| setOfPredicates += setOfNonKeyPredicates; |
| |
| // ----------------------------------------------------------------------- |
| // Now set the SearchKey's executor predicates: |
| // ----------------------------------------------------------------------- |
| setExecutorPredicates(setOfPredicates); |
| |
| isUnique_ = keyPredicatesUnique; |
| |
| // --------------------------------------------------------------------- |
| // Figure out the leading key columns. Store the length in coveredLeadingKeys_ |
| // --------------------------------------------------------------------- |
| computeCoveredLeadingKeys(); |
| |
| // --------------------------------------------------------------------- |
| // Free the work space. |
| // --------------------------------------------------------------------- |
| delete workSpace; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // SearchKey() |
| // |
| // A search key is a pair of expressions that together define a range |
| // of values. Its purpose is to localize the search for specific |
| // values within a b-tree index. |
| // |
| // This method is used for building a search key for an operator that |
| // will be used for accessing data from the given access path. It |
| // receives a set of predicates as an input. It analyzes them and |
| // uses eligible ones for forming the search key. It removes the |
| // ValueIds of those predicates that it can use for forming the |
| // search key form the given set. (This constructor is therefore a |
| // function with the side-effect that the setOfPredicates can be |
| // modified.) |
| // |
| // It returns a SearchKey regardless of whether eligible key |
| // predicates are found. In such a case, the range to be search |
| // is logically equal to the interval (-infinity, +infinity). |
| // |
| // Parameters: |
| // |
| // const ValueIdList & keyColumns |
| // IN : A read-only reference to the list of key columns |
| // |
| // const ValueIdList & orderOfKeyValues |
| // IN : A list of expressions that define the ASC/DESC |
| // order of key values. It can be empty. |
| // |
| // const ValueIdSet & externalInputs |
| // IN : A read-only reference to the external inputs that are |
| // available to the operator that will be used for accessing |
| // data from the access path. |
| // |
| // const NABoolean forwardSearch |
| // IN : TRUE => The search in the index will use a top-down |
| // left-to-right tree walk. |
| // FALSE => right-to-left tree walk |
| // |
| // ValueIdSet & setOfPredicates |
| // IN : The set of predicates that are to be analyzed for |
| // determining whether some of them can be used for |
| // forming the search key. |
| // OUT: The set of predicates that are not used for forming |
| // the search key, i.e., key predicates = IN - OUT. |
| // |
| // const ValueIdSet & nonKeyColumnSet |
| // IN : The set of nonKeyColumns for this table |
| // |
| // const IndexDesc * indexDesc |
| // IN : The pointer to the indexDesc |
| // |
| // Return value |
| // OUT: A SearchKey |
| // |
| // ----------------------------------------------------------------------- |
| SearchKey::SearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const IndexDesc *UDindexDesc) |
| :ScanKey(keyColumns, |
| externalInputs, |
| *(new (CmpCommon::statementHeap()) MaterialDisjuncts( |
| setOfPredicates)), |
| nonKeyColumnSet, indexDesc, UDindexDesc), |
| beginKeyValues_(keyColumns.entries()), |
| endKeyValues_(keyColumns.entries()), |
| valuesArePartitionRange_(FALSE), |
| _countTimesBoundaryValReq(0) |
| { |
| |
| init(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| forwardSearch, |
| setOfPredicates); |
| |
| } // SearchKey::SearchKey() |
| |
| SearchKey::SearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates, |
| const Disjuncts& associatedDisjuncts, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const IndexDesc *UDindexDesc) |
| :ScanKey(keyColumns, |
| externalInputs, |
| associatedDisjuncts, |
| nonKeyColumnSet, |
| indexDesc, |
| UDindexDesc |
| ), |
| beginKeyValues_(keyColumns.entries()), |
| endKeyValues_(keyColumns.entries()), |
| valuesArePartitionRange_(FALSE), |
| _countTimesBoundaryValReq(0) |
| { |
| init(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| forwardSearch, |
| setOfPredicates); |
| |
| } // SearchKey::SearchKey() |
| |
| // ----------------------------------------------------------------------- |
| // Constructor to set up partition search keys for a range partitioned table |
| // ----------------------------------------------------------------------- |
| SearchKey::SearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates, |
| const RangePartitioningFunction *rpf, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const IndexDesc *UDindexDesc) : |
| ScanKey(keyColumns, |
| externalInputs, |
| *(new (CmpCommon::statementHeap()) MaterialDisjuncts( |
| setOfPredicates)), |
| nonKeyColumnSet, indexDesc, UDindexDesc), |
| valuesArePartitionRange_(FALSE), |
| _countTimesBoundaryValReq(0) |
| { |
| const ValueIdList &pivs = rpf->getPartitionInputValuesLayout(); |
| CollIndex commonCols = 0; |
| NABoolean equivCols = TRUE; |
| NABoolean sameScan = FALSE; |
| |
| // add the partitioning key predicates to the potential key |
| // predicates (if not already done by caller) |
| setOfPredicates += rpf->getPartitioningKeyPredicates(); |
| |
| // loop over the columns of the index and the range partitioning |
| // function as long as those columns correspond to each other |
| // and are in the same (or inverse) order |
| while (equivCols AND |
| commonCols < rpf->getKeyColumnList().entries() AND |
| commonCols < keyColumns.entries()) |
| { |
| ItemExpr *kc = keyColumns[commonCols].getItemExpr(); |
| ItemExpr *rc = rpf->getKeyColumnList()[commonCols].getItemExpr(); |
| |
| if (kc != rc AND kc->getOperatorType() == ITM_INDEXCOLUMN) |
| kc = ((IndexColumn *) kc)->getDefinition().getItemExpr(); |
| if (kc != rc) |
| { |
| if (rc->getOperatorType() == ITM_VEG_REFERENCE) |
| { |
| if (NOT (((VEGReference *) rc)->getVEG()-> |
| getAllValues().contains(kc->getValueId()))) |
| equivCols = FALSE; |
| } |
| else |
| equivCols = FALSE; |
| } |
| |
| // determine the orderings of the key columns, whether they are |
| // the same, inverse, or incompatible. |
| if (equivCols) |
| { |
| NABoolean colIsInv = |
| (orderOfKeyValues[commonCols].getItemExpr()-> |
| getOperatorType() == ITM_INVERSE); |
| NABoolean pfIsInv = |
| (rpf->getOrderOfKeyValues()[commonCols].getItemExpr()-> |
| getOperatorType() == ITM_INVERSE); |
| if (commonCols == 0) |
| sameScan = (colIsInv == pfIsInv); |
| else |
| if (sameScan != (colIsInv == pfIsInv)) |
| equivCols = FALSE; |
| } |
| |
| // try next column if we are still in the game |
| if (equivCols) |
| commonCols++; |
| } |
| |
| // If at least one column matched, then use the PIVs as the begin/end |
| // key values for the columns that did match. For any remaining |
| // key columns that didn't match, set the begin/end key values |
| // to the absolute MIN or MAX. |
| if (commonCols >= 1) |
| { |
| CollIndex numPartKeyCols = rpf->getKeyColumnList().entries(); |
| |
| for (CollIndex i = 0; i < keyColumns.entries(); i++) |
| { |
| if (i < commonCols) |
| { |
| // take the begin/end key values from the range partitioning |
| // input variables |
| |
| // all NABooleans in this context are set to results of C++ |
| // comparisons, so that we can use the "==" operator on them |
| if (sameScan == (forwardSearch == TRUE)) |
| { |
| // start with the lo values, end with hi values |
| beginKeyValues_.insert(pivs[i]); |
| endKeyValues_.insert(pivs[i+numPartKeyCols]); |
| } |
| else // inverse scan |
| { |
| // start with hi values, end with lo values |
| beginKeyValues_.insert(pivs[i+numPartKeyCols]); |
| endKeyValues_.insert(pivs[i]); |
| } |
| } |
| else |
| { |
| // This and all subsequent columns didn't match. There were |
| // fewer columns in the range partitioning function. This |
| // could happen, for example, if there were two columns in the |
| // key, but first key values were only specified for the first |
| // column. |
| |
| // NOTE: this can happen because we currently store ALL |
| // clustering key columns as partitioning key in an IndexDesc. |
| // For tables that are partitioned on a prefix of the |
| // clustering key columns, special logic is required. For |
| // ascending values, we want the end key of the |
| // non-partitioning column to be the min value, since that is |
| // the assumed value when we provide a start key in a DDL |
| // statement. For example, if a table is clustered on (a,b) |
| // and we created 3 partitions with start keys (-infinity), |
| // (100), (200), then the end key for reading the first |
| // partition should be (100,-infinity), not (100, |
| // +infinity). Note that in this case the end key is |
| // exclusive. Note also that for the last partition it would |
| // be a mistake to use -infinity, because its end key is |
| // inclusive! Therefore, the end key for this case is |
| // generated as (+infinity, +infinity). Sorry about the very |
| // complex if-then-else statement that does this determination |
| // at execution time. |
| |
| // The PIVs supply the value with a lower key encoding in |
| // index i and the value with the higher key encoding in index |
| // i+numPartKeyCols. However, we need to consider that for |
| // DESC columns the key encoding of the PIV will include an |
| // automatic inversion of all bits. Start with the MIN value |
| // if this is a forward scan for an ascending column or a |
| // backward scan of a descending column, start with MAX in the |
| // opposite cases. Note that this is a different condition |
| // than for PIVs because of said automatic inversion. |
| ValueId minValue = computeMissingKeyValue(keyColumns[i],TRUE); |
| ValueId maxValue = computeMissingKeyValue(keyColumns[i],FALSE); |
| ItemExpr *minMax; |
| |
| if (forwardSearch == (orderOfKeyValues[i].getItemExpr()-> |
| getOperatorType() != ITM_INVERSE)) |
| { |
| // forward scan of ASC col or reverse scan of DESC col |
| beginKeyValues_.insert(minValue); |
| minMax = |
| new (CmpCommon::statementHeap()) Case( |
| NULL, |
| new (CmpCommon::statementHeap()) IfThenElse( |
| new (CmpCommon::statementHeap()) BiRelat( |
| ITM_NOT_EQUAL, |
| pivs[2*numPartKeyCols].getItemExpr(), |
| new (CmpCommon::statementHeap()) SystemLiteral(0)), |
| minValue.getItemExpr(), |
| maxValue.getItemExpr())); |
| minMax->synthTypeAndValueId(); |
| endKeyValues_.insert(minMax->getValueId()); |
| } |
| else |
| { |
| beginKeyValues_.insert(maxValue); |
| minMax = |
| new (CmpCommon::statementHeap()) Case( |
| NULL, |
| new (CmpCommon::statementHeap()) IfThenElse( |
| new (CmpCommon::statementHeap()) BiRelat( |
| ITM_NOT_EQUAL, |
| pivs[2*numPartKeyCols].getItemExpr(), |
| new (CmpCommon::statementHeap()) SystemLiteral(0)), |
| maxValue.getItemExpr(), |
| minValue.getItemExpr())); |
| minMax->synthTypeAndValueId(); |
| endKeyValues_.insert(minMax->getValueId()); |
| } |
| } // end else non-matching columns |
| } // end for all key index desc key columns |
| |
| keyPredicates_ += rpf->getPartitioningKeyPredicates(); |
| isUnique_ = FALSE; |
| beginKeyIsExclusive_ = FALSE; |
| endKeyIsExclusive_ = FALSE; |
| |
| // --------------------------------------------------------------------- |
| // Subtract the key predicates from the given setOfPredicates. |
| // Add any extra selection expressions that are generated by the key |
| // builder, including the key predicates that should be evaluated |
| // separately |
| // --------------------------------------------------------------------- |
| setOfPredicates -= keyPredicates_; |
| |
| ValueIdSet keyPredsToBeEvaluated; |
| keyPredicates_.getVEGesWithMultipleConsts(keyPredsToBeEvaluated); |
| setOfPredicates += keyPredsToBeEvaluated; |
| |
| endKeyExclusionExpr_ = pivs[2*numPartKeyCols]; |
| } |
| else |
| { |
| // couldn't make use of range partitioning function, do it |
| // the usual way |
| init(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| forwardSearch, |
| setOfPredicates); |
| } |
| } // SearchKey::SearchKey() |
| |
| // ----------------------------------------------------------------------- |
| // Constructor to set up partition search keys for a hash partitioned table |
| // ----------------------------------------------------------------------- |
| SearchKey::SearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| ValueIdSet & setOfPredicates, |
| const TableHashPartitioningFunction *hpf, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc * indexDesc, |
| const IndexDesc * UDindexDesc) : |
| ScanKey(keyColumns, |
| externalInputs, |
| *(new (CmpCommon::statementHeap()) MaterialDisjuncts( |
| setOfPredicates)), |
| nonKeyColumnSet, indexDesc, UDindexDesc), |
| valuesArePartitionRange_(FALSE), |
| _countTimesBoundaryValReq(0) |
| { |
| |
| // loop over the columns of the index and the hash partitioning |
| // function as long as those columns correspond to each other |
| // |
| CollIndex numKeyCols = hpf->getKeyColumnList().entries(); |
| |
| // Make sure they both have the same number of entries. |
| // |
| NABoolean equivCols = (numKeyCols == keyColumns.entries()); |
| |
| CollIndex colNum = 0; |
| while (equivCols AND colNum < numKeyCols) { |
| |
| ItemExpr *keyCol = keyColumns[colNum].getItemExpr(); |
| ItemExpr *hashCol = hpf->getKeyColumnList()[colNum].getItemExpr(); |
| |
| if (keyCol != hashCol AND keyCol->getOperatorType() == ITM_INDEXCOLUMN) |
| keyCol = ((IndexColumn *) keyCol)->getDefinition().getItemExpr(); |
| |
| if (keyCol != hashCol) { |
| if (hashCol->getOperatorType() == ITM_VEG_REFERENCE) { |
| if (NOT (((VEGReference *) hashCol)->getVEG()-> |
| getAllValues().contains(keyCol->getValueId()))) |
| equivCols = FALSE; |
| } else { |
| equivCols = FALSE; |
| } |
| } |
| |
| colNum++; |
| } |
| |
| // if all columns match, then use the PIVs as the begin/end key |
| // values and set the flag 'valuesArePartitionRange' to TRUE. This |
| // indicates that the begin/end values are no longer key values, but |
| // a range of partitions to be accessed. These will be used by |
| // the generator/executor to set up the range of partitions to access. |
| // |
| if (equivCols) { |
| |
| const ValueIdList &pivs = hpf->getPartitionInputValuesLayout(); |
| |
| // Begin/End key values are no longer key values, but begin/end |
| // partition numbers |
| // |
| valuesArePartitionRange_ = TRUE; |
| beginKeyValues_.insert(pivs[0]); |
| endKeyValues_.insert(pivs[1]); |
| |
| // TableHashPartitioningFunction has no key predicates. |
| // |
| isUnique_ = FALSE; |
| beginKeyIsExclusive_ = FALSE; |
| endKeyIsExclusive_ = FALSE; |
| endKeyExclusionExpr_ = NULL_VALUE_ID; |
| |
| } else { |
| // couldn't make use of hash partitioning function, do it |
| // the usual way |
| init(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| TRUE, |
| setOfPredicates); |
| } |
| } // SearchKey::SearchKey() |
| |
| // ----------------------------------------------------------------------- |
| // Constructor to set up partition search keys for a RR partitioned table |
| // ----------------------------------------------------------------------- |
| SearchKey::SearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| ValueIdSet & setOfPredicates, |
| const RoundRobinPartitioningFunction *rrpf, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const IndexDesc *UDindexDesc) : |
| ScanKey(keyColumns, |
| externalInputs, |
| *(new (CmpCommon::statementHeap()) MaterialDisjuncts( |
| setOfPredicates)), |
| nonKeyColumnSet, indexDesc, UDindexDesc), |
| valuesArePartitionRange_(FALSE), |
| _countTimesBoundaryValReq(0) |
| { |
| |
| // loop over the columns of the index and the hash partitioning |
| // function as long as those columns correspond to each other |
| // |
| CollIndex numKeyCols = rrpf->getPartitioningKey().entries(); |
| |
| // Make sure they both have the same number of entries. |
| // |
| NABoolean equivCols = (numKeyCols == keyColumns.entries()); |
| |
| // Make sure they both have the same number of entries. |
| // |
| if(equivCols) { |
| |
| // Round Robin Partitioning has only one key (SYSKEY) |
| // |
| CMPASSERT(numKeyCols == 1); |
| |
| ItemExpr *keyCol = keyColumns[0].getItemExpr(); |
| ValueId rrVal; |
| rrpf->getPartitioningKey().getFirst(rrVal); |
| ItemExpr *rrCol = rrVal.getItemExpr(); |
| |
| if (keyCol != rrCol AND keyCol->getOperatorType() == ITM_INDEXCOLUMN) |
| keyCol = ((IndexColumn *) keyCol)->getDefinition().getItemExpr(); |
| |
| if (keyCol != rrCol) { |
| if (rrCol->getOperatorType() == ITM_VEG_REFERENCE) { |
| if (NOT (((VEGReference *) rrCol)->getVEG()-> |
| getAllValues().contains(keyCol->getValueId()))) |
| equivCols = FALSE; |
| } else { |
| equivCols = FALSE; |
| } |
| } |
| } |
| |
| // if all (the) columns match, then use the PIVs as the begin/end key |
| // values and set the flag 'valuesArePartitionRange' to TRUE. This |
| // indicates that the begin/end values are no longer key values, but |
| // a range of partitions to be accessed. These will be used by |
| // the generator/executor to set up the range of partitions to access. |
| // |
| if (equivCols) { |
| |
| const ValueIdList &pivs = rrpf->getPartitionInputValuesLayout(); |
| |
| // Begin/End key values are no longer key values, but begin/end |
| // partition numbers |
| // |
| valuesArePartitionRange_ = TRUE; |
| beginKeyValues_.insert(pivs[0]); |
| endKeyValues_.insert(pivs[1]); |
| |
| // RRPartitioningFunction has no key predicates. |
| // |
| isUnique_ = FALSE; |
| beginKeyIsExclusive_ = FALSE; |
| endKeyIsExclusive_ = FALSE; |
| endKeyExclusionExpr_ = NULL_VALUE_ID; |
| |
| } else { |
| // couldn't make use of RR partitioning function, do it |
| // the usual way |
| init(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| TRUE, |
| setOfPredicates); |
| } |
| } // SearchKey::SearchKey() |
| |
| ItemExpr *SearchKey::getBeginKeyExclusionExpr() const |
| { |
| if (beginKeyExclusionExpr_ == NULL_VALUE_ID) |
| return NULL; |
| return beginKeyExclusionExpr_.getItemExpr(); |
| } |
| |
| ItemExpr *SearchKey::getEndKeyExclusionExpr() const |
| { |
| if (endKeyExclusionExpr_ == NULL_VALUE_ID) |
| return NULL; |
| return endKeyExclusionExpr_.getItemExpr(); |
| } |
| |
| |
| // ----------------------------------------------------------------------- |
| // SearchKey::computeMissingKeyValue() |
| // ----------------------------------------------------------------------- |
| ValueId SearchKey::computeMissingKeyValue(ValueId keyColumn, |
| NABoolean wantMinValue) |
| { |
| const NAType & type = keyColumn.getItemExpr()->getValueId().getType(); |
| NAColumn *column = keyColumn.getItemExpr()->getValueId().getNAColumn(); |
| |
| ItemExpr * minMaxConstant = |
| new (CmpCommon::statementHeap()) |
| SystemLiteral(type.newCopy(CmpCommon::statementHeap()), |
| wantMinValue, |
| type.supportsSQLnull()); |
| |
| minMaxConstant->synthTypeAndValueId(); |
| |
| return minMaxConstant->getValueId(); |
| } // SearchKey::computeMissingKeyValue() |
| |
| // ----------------------------------------------------------------------- |
| // Methods for debugging |
| // ----------------------------------------------------------------------- |
| void SearchKey::print(FILE* ofd, const char* indent, const char* title) const |
| { |
| BUMP_INDENT(indent); |
| fprintf(ofd,"%s %s \n%s",NEW_INDENT,title,NEW_INDENT); |
| getKeyColumns().print(ofd,indent,"Key Columns:"); |
| getBeginKeyValues().print(ofd,indent,"Begin Key Values:"); |
| getEndKeyValues().print(ofd,indent,"End Key Values:"); |
| getKeyPredicates().print(ofd,indent,"Key Predicates:"); |
| } // SearchKey::print() |
| |
| // To be called from the debugger. |
| void SearchKey::display() const |
| { |
| SearchKey::print(); |
| } // SearchKey::display() |
| |
| // *********************************************************************** |
| // $$$ SearchKeyBounds |
| // *********************************************************************** |
| |
| NABoolean SearchKeyBounds::operator == (const SearchKeyBounds & other) const |
| { |
| if ( (getKeyColumnId() == other.getKeyColumnId()) AND |
| (isAscendingOrder() == other.isAscendingOrder()) AND |
| (isForwardSearch() == other.isForwardSearch()) AND |
| (getBeginKeyBoundType() == other.getBeginKeyBoundType()) AND |
| (getBeginKeyPredicate() == other.getBeginKeyPredicate()) AND |
| (getBeginKeyValue() == other.getBeginKeyValue()) AND |
| (getEndKeyBoundType() == other.getEndKeyBoundType()) AND |
| (getEndKeyPredicate() == other.getEndKeyPredicate()) AND |
| (getEndKeyValue() == other.getEndKeyValue()) ) |
| return TRUE; |
| else |
| return FALSE; |
| } // SearchKeyBounds::operator == () |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::analyzeSearchPredicates() |
| // |
| // Parameters: |
| // |
| // const ValueIdSet & setOfPredicates |
| // IN : The set of predicates that are to be analyzed for |
| // determining whether some of them can be used for |
| // forming the search key. |
| // |
| // const ValueIdSet & externalInputs |
| // IN : A read-only reference to the external inputs that are |
| // available to the operator that will be used for accessing |
| // data from the access path. |
| // |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::analyzeSearchPredicates(const ValueIdSet & setOfPredicates, |
| const ValueIdSet & externalInputs) |
| { |
| // --------------------------------------------------------------------- |
| // LOOP1 : for each predicate in the given set of predicates ... |
| // --------------------------------------------------------------------- |
| |
| ValueId columnId; |
| const ItemExpr *colIEPtr=getKeyColumnId().getItemExpr(); |
| switch (colIEPtr->getOperatorType()) |
| { |
| case ITM_BASECOLUMN: |
| { |
| |
| columnId = getKeyColumnId(); |
| } |
| break; |
| |
| case ITM_INDEXCOLUMN: |
| { |
| |
| // ------------------------------------------------- |
| // Get the base column for the index column: |
| // ------------------------------------------------- |
| BaseColumn *bcIEPtrForIndexColumn = |
| (BaseColumn *) ((IndexColumn *) colIEPtr)-> |
| getDefinition().getItemExpr(); |
| |
| columnId = bcIEPtrForIndexColumn->getValueId(); |
| } |
| break; |
| |
| default: |
| // All value id's in SearchKeyBounds *must* be either |
| // an index column or a base table column, |
| // if we reach here it is an internal error: |
| CMPASSERT(FALSE); |
| } // switch on type of input column |
| |
| |
| for (ValueId predId = setOfPredicates.init(); |
| setOfPredicates.next(predId); |
| setOfPredicates.advance(predId) ) |
| { |
| ValueId referencedInput(NULL_VALUE_ID); |
| ValueId intervalExclusionExpr(NULL_VALUE_ID); |
| // ----------------------------------------------------------------- |
| // Check whether the given predicate is eligible for use as a |
| // search predicate on the index. |
| // ----------------------------------------------------------------- |
| |
| // Is this a key pred. for the bound's columns? |
| if (getSearchKey().isAKeyPredicateForColumn(predId |
| ,referencedInput |
| ,intervalExclusionExpr |
| ,columnId)) |
| { |
| // yes! |
| // ------------------------------------------------------------- |
| // The predicate is a key |
| // predicate, for the bound's column thus |
| // use the predicate for setting a lower and/or upper bound on |
| // the search for values on the given key column. |
| // ------------------------------------------------------------- |
| if (referencedInput == NULL_VALUE_ID) |
| { |
| ItemExpr * nullConstant = new(CmpCommon::statementHeap()) |
| SystemLiteral(); |
| nullConstant->synthTypeAndValueId(); |
| referencedInput = nullConstant->getValueId(); |
| } |
| |
| applyABoundForTheSearch(predId, |
| referencedInput, |
| intervalExclusionExpr); |
| } |
| } // loop over predicates |
| |
| } // SearchKeyBounds::analyzeSearchPredicate() |
| |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::applyABoundForTheSearch() |
| // |
| // Depending on the type of comparison that is performed by the |
| // predicate use the value that is referenced in it and |
| // establish a bound for the search. |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::applyABoundForTheSearch(const ValueId & predId, |
| const ValueId & referencedInput, |
| const ValueId & intervalExclusionExpr) |
| { |
| BoundType boundType = getBoundType(predId,referencedInput); |
| NABoolean isExclusive = isBoundExclusive(predId); |
| |
| // get predicate ItemExpr |
| ItemExpr * predItemExpr = predId.getItemExpr(); |
| |
| BiRelat * rangePred = NULL; |
| |
| // set pointer to range predicate |
| if (predItemExpr->isARangePredicate()) |
| { |
| rangePred = (BiRelat *) predItemExpr; |
| } |
| |
| // --------------------------------------------------------------------- |
| // Has a lower bound or an upper bound been chosen yet? |
| // --------------------------------------------------------------------- |
| switch (boundType) |
| { |
| case BOUND_TYPE_LOWER: |
| { |
| if (isBeginKeyChosen()) |
| { |
| // ----------------------------------------------------------- |
| // In the future, the key builder will accumulate all the |
| // candidate lower bounds and finally supply a lower bound |
| // that is expressed as LargestOf(lbv1, lbv2, ..., lbvn). |
| // Until then, use any one key predicate for the lower bound. |
| // ----------------------------------------------------------- |
| |
| // if a predicate has been marked as a preference for subset |
| // begin/end key use that predicate to define the begin/end key |
| if(rangePred && |
| rangePred->isPreferredForSubsetScanKey() && |
| getBeginKeyBoundType() != BOUND_TYPE_EQUAL) |
| { |
| // If a predicate has been marked as a preference for subset |
| // begin/end key use the bounds implied by that predicate. |
| // ----------------------------------------------------------- |
| // Establish a new lower bound. |
| // ----------------------------------------------------------- |
| setBeginKeyBoundType(boundType); |
| setBeginKeyPredicate(predId); |
| setBeginKeyValue(referencedInput); |
| beginExclusionExpr_ = intervalExclusionExpr; |
| beginValueIsExclusive_ = isExclusive; |
| } |
| } |
| else |
| { |
| // ----------------------------------------------------------- |
| // Establish a new lower bound. |
| // ----------------------------------------------------------- |
| setBeginKeyBoundType(boundType); |
| setBeginKeyPredicate(predId); |
| setBeginKeyValue(referencedInput); |
| beginExclusionExpr_ = intervalExclusionExpr; |
| beginValueIsExclusive_ = isExclusive; |
| } |
| break; |
| } |
| case BOUND_TYPE_EQUAL: |
| { |
| // --------------------------------------------------------------- |
| // If a bound has not been chosen so far, or a bound has been |
| // chosen but it defines a range, establish an equality bound |
| // for the search using the given predicate. |
| // --------------------------------------------------------------- |
| if (getEndKeyBoundType() != BOUND_TYPE_EQUAL) |
| { |
| // ----------------------------------------------------------- |
| // Establish the same lower and upper bound values. |
| // ----------------------------------------------------------- |
| setBeginKeyBoundType(boundType); |
| setBeginKeyPredicate(predId); |
| setBeginKeyValue(referencedInput); |
| beginValueIsExclusive_ = isExclusive; |
| |
| setEndKeyBoundType(boundType); |
| setEndKeyPredicate(predId); |
| setEndKeyValue(referencedInput); |
| endValueIsExclusive_ = isExclusive; |
| } |
| else |
| CMPASSERT(getBeginKeyBoundType() == BOUND_TYPE_EQUAL); |
| break; |
| } |
| case BOUND_TYPE_UPPER: |
| { |
| if (isEndKeyChosen()) |
| { |
| // ----------------------------------------------------------- |
| // In the future, the key builder will accumulate all the |
| // candidate upper bounds and finally supply an upper bound |
| // that is expressed as SmallestOf(ubv1, ubv2, ..., ubvn). |
| // Until then, use any one key predicate for the upper bound. |
| // ----------------------------------------------------------- |
| |
| // if a predicate has been marked as a preference for subset |
| // begin/end key use that predicate to define the begin/end key |
| if(rangePred && |
| rangePred->isPreferredForSubsetScanKey() && |
| getEndKeyBoundType() != BOUND_TYPE_EQUAL) |
| { |
| // If a predicate has been marked as a preference for subset |
| // begin/end key use the bounds implied by that predicate. |
| // ----------------------------------------------------------- |
| // Establish a new lower bound. |
| // ----------------------------------------------------------- |
| setEndKeyBoundType(boundType); |
| setEndKeyPredicate(predId); |
| setEndKeyValue(referencedInput); |
| endExclusionExpr_ = intervalExclusionExpr; |
| endValueIsExclusive_ = isExclusive; |
| } |
| } |
| else |
| { |
| // ----------------------------------------------------------- |
| // Establish a new upper bound. |
| // ----------------------------------------------------------- |
| setEndKeyBoundType(boundType); |
| setEndKeyPredicate(predId); |
| setEndKeyValue(referencedInput); |
| endExclusionExpr_ = intervalExclusionExpr; |
| endValueIsExclusive_ = isExclusive; |
| } |
| break; |
| } |
| default: |
| // internal error case |
| break; |
| } // end switch |
| |
| } // SearchKeyBounds::applyABoundForTheSearch |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::getBoundType() |
| // Depending on the type of comparison that is performed by the |
| // predicate, decide whether it will provide a lower bound, |
| // an upper bound or both. |
| // ----------------------------------------------------------------------- |
| SearchKeyBounds::BoundType SearchKeyBounds::getBoundType( |
| const ValueId & predId, |
| const ValueId & referencedInput) const |
| { |
| ItemExpr *ie = predId.getItemExpr(); |
| OperatorTypeEnum otype = ie->getOperatorType(); |
| |
| // do we want to invert the bound type? |
| |
| // invert it if this is a reverse scan |
| NABoolean invertBoundType = NOT isForwardSearch(); |
| |
| // also invert it if the index is in descending order |
| if (NOT isAscendingOrder()) |
| invertBoundType = NOT invertBoundType; |
| |
| // also invert it if the expression is "expr op col" intead of "col op expr" |
| if (ie->getArity() > 1 AND ie->child(1)->getValueId() != referencedInput) |
| invertBoundType = NOT invertBoundType; |
| |
| switch (otype) |
| { |
| case ITM_GREATER_EQ: |
| case ITM_GREATER: |
| case ITM_GREATER_OR_GE: |
| if (invertBoundType) |
| return BOUND_TYPE_UPPER; |
| else |
| return BOUND_TYPE_LOWER; |
| |
| case ITM_VEG_PREDICATE: |
| case ITM_IS_NULL: |
| case ITM_EQUAL: |
| case ITM_ASSIGN: |
| return BOUND_TYPE_EQUAL; |
| |
| case ITM_LESS_EQ: |
| case ITM_LESS: |
| case ITM_LESS_OR_LE: |
| if (invertBoundType) |
| return BOUND_TYPE_LOWER; |
| else |
| return BOUND_TYPE_UPPER; |
| |
| default: |
| // internal error |
| return BOUND_TYPE_NOT_SET; |
| } |
| } // SearchKeyBounds::getBoundType() |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::isBoundExclusive() |
| // ----------------------------------------------------------------------- |
| NABoolean SearchKeyBounds::isBoundExclusive(const ValueId & predId) |
| { |
| switch (predId.getItemExpr()->getOperatorType()) |
| { |
| case ITM_GREATER: |
| case ITM_LESS: |
| { |
| // only > and < create exclusive bounds that are known at compile time, |
| // except for Bignum typed pred. Because of potential truncation by |
| // the division operation on key value for a key column, we need to |
| // evaluate the predicate as execute predicate. So we return FALSE here t |
| // include the row positioned by the index to be evaluated by the |
| // execute predicate. See SQ2535. |
| |
| ItemExpr * keyExpr = (predId.getValueDesc())->getItemExpr(); |
| ItemExpr * keycol = ((*keyExpr)[0])->castToItemExpr(); |
| |
| const NAType * type = &(keycol->getValueId().getType()); |
| |
| if ( type->getTypeQualifier() == NA_NUMERIC_TYPE && |
| ((const NumericType *)type)->isBigNum() |
| ) |
| return FALSE; |
| else |
| return TRUE; |
| } |
| |
| default: |
| break; |
| } |
| return FALSE; |
| } |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::computeBeginKeyAndEndKeyValues() |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::computeBeginKeyAndEndKeyValues |
| (ValueIdSet & setOfKeyPredicates, |
| ValueIdSet & setOfNonKeyPredicates, |
| NABoolean & previousPredsAreEqualPreds, |
| NABoolean & beginKeyIsExclusive, |
| NABoolean & endKeyIsExclusive, |
| ValueId & beginKeyExclusionExpr, |
| ValueId & endKeyExclusionExpr, |
| NABoolean & beginKeyMissing, |
| NABoolean & endKeyMissing, |
| NABoolean &allChosenPredsAreEqualPreds) |
| { |
| // --------------------------------------------------------------------- |
| // If a bound is already chosen, replace a > comparison with a >= and |
| // a < comparison with a <=. |
| // If a bound is missing, then get the lowest or highest value that |
| // is representable for the datatype of the key column. |
| // --------------------------------------------------------------------- |
| computeBeginKeyValue(beginKeyIsExclusive); |
| computeEndKeyValue(endKeyIsExclusive); |
| |
| // --------------------------------------------------------------------- |
| // Add the ValueIds of the chosen key predicates to the given set, |
| // if a begin/end key predicate has been chosen. |
| // --------------------------------------------------------------------- |
| beginKeyMissing = TRUE; |
| endKeyMissing = TRUE; |
| if (isBeginKeyPredicateChosen()) |
| { |
| setOfKeyPredicates += getBeginKeyPredicate(); |
| beginKeyMissing = FALSE; |
| |
| if (getBeginKeyBoundType() != BOUND_TYPE_EQUAL) |
| allChosenPredsAreEqualPreds = FALSE; |
| } |
| if (isEndKeyPredicateChosen()) |
| { |
| setOfKeyPredicates += getEndKeyPredicate(); |
| endKeyMissing = FALSE; |
| |
| if (getEndKeyBoundType() != BOUND_TYPE_EQUAL) |
| allChosenPredsAreEqualPreds = FALSE; |
| } |
| |
| // --------------------------------------------------------------------- |
| // Replicate key predicates on a column that is nullable as a selection |
| // expression. Also, replicate key predicates when its predecessor |
| // is not a string of equalities. |
| // --------------------------------------------------------------------- |
| if ( (getKeyColumnId().getType().supportsSQLnull()) OR //## SQLnullLogical()? |
| NOT previousPredsAreEqualPreds ) |
| { |
| if (isBeginKeyPredicateChosen()) |
| setOfNonKeyPredicates += getBeginKeyPredicate(); |
| if (isEndKeyPredicateChosen()) |
| setOfNonKeyPredicates += getEndKeyPredicate(); |
| } |
| |
| // --------------------------------------------------------------------- |
| // Assign the bound type for the current key to influence the |
| // replication of key predicates for the succeeding key column. |
| // --------------------------------------------------------------------- |
| if (previousPredsAreEqualPreds AND |
| getBeginKeyBoundType() != BOUND_TYPE_EQUAL) |
| { |
| // Note that either both begin and end bound types are equal |
| // or both are something else. |
| // This marks the place where we can no longer use key predicates |
| // exclusively, from now on we need to evaluate them as executor |
| // predicates as well (until GEM comes along) |
| previousPredsAreEqualPreds = FALSE; |
| beginKeyIsExclusive = isBeginValueExclusive(); |
| endKeyIsExclusive = isEndValueExclusive(); |
| beginKeyExclusionExpr = getBeginExclusionExpr(); |
| endKeyExclusionExpr = getEndExclusionExpr(); |
| } |
| |
| // If previousPredsAreEqualPreds is FALSE, and the begin or end key |
| // value is exclusive, one could consider adjusting the key value |
| // to make the range inclusive. This is optional, since the predicates |
| // are duplicated now as executor predicates, but it could help eliminate |
| // extra rows that would otherwise be read from the index. |
| |
| } // SearchKeyBounds::computeBeginKeyAndEndKeyValues() |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::computeBeginKeyValue() |
| // |
| // Index order: ascending |
| // forward search |
| // begin key --------------> end key |
| // X-----------------------------------------| |
| // min value max value |
| // |
| // reverse search |
| // end key <-------------- begin key |
| // |-----------------------------------------$ |
| // min value max value |
| // |
| // Index order: descending |
| // forward search |
| // begin key --------------> end key |
| // $-----------------------------------------| |
| // max value min value |
| // reverse search |
| // end key <-------------- begin key |
| // |-----------------------------------------X |
| // max value min value |
| // |
| // If the previous bound was an exclusive interval, then we want to |
| // reverse the min/max value, in order to exclude all values for |
| // this key column. Example for a key (a,b) with a key predicate a >= 5 |
| // we produce a begin key (5,-infinity), while for a key predicate a > 5 |
| // we would produce an (exclusive) begin key (5,+infinity). |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::computeBeginKeyValue(NABoolean prevBoundWasExclusive) |
| { |
| if (NOT isBeginKeyValueChosen()) |
| { // case: missing key predicate |
| // Index order : ASC direction of search : ASC |
| // OR Index order : DESC direction of search : DESC |
| if ( (isAscendingOrder() AND isForwardSearch()) OR |
| ((NOT isAscendingOrder()) AND (NOT isForwardSearch())) ) |
| { |
| // Get the minimum value (Cases marked by the X above) |
| beginKeyValue_ = computeMissingKeyValue(NOT prevBoundWasExclusive); |
| } |
| // Index order : ASC direction of search : DESC |
| // OR Index order : DESC direction of search : ASC |
| else if ( (isAscendingOrder() AND (NOT isForwardSearch())) OR |
| ((NOT isAscendingOrder()) AND isForwardSearch()) ) |
| { |
| // Get the maximum value (Cases marked by the $ above) |
| beginKeyValue_ = computeMissingKeyValue(prevBoundWasExclusive); |
| } |
| setBeginKeyBoundType(BOUND_TYPE_LOWER); |
| } // case: missing key predicate |
| |
| } // SearchKeyBounds::computeBeginKeyValue() |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::computeEndKeyValue() |
| // |
| // Index order: ascending |
| // forward search |
| // begin key --------------> end key |
| // |-----------------------------------------$ |
| // min value max value |
| // |
| // reverse search |
| // end key <-------------- begin key |
| // X-----------------------------------------| |
| // min value max value |
| // |
| // Index order: descending |
| // forward search |
| // begin key --------------> end key |
| // |-----------------------------------------$ |
| // max value min value |
| // reverse search |
| // end key <-------------- begin key |
| // X-----------------------------------------| |
| // max value min value |
| // |
| // If the previous bound was an exclusive interval, then we want to |
| // reverse the min/max value, in order to exclude all values for |
| // this key column. Example for a key (a,b) with a key predicate a <= 5 |
| // we produce an end key (5,+infinity), while for a key predicate a < 5 |
| // we would produce an (exclusive) end key (5,-infinity). |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::computeEndKeyValue(NABoolean prevBoundWasExclusive) |
| { |
| if (NOT isEndKeyValueChosen()) |
| { // case: missing key predicate |
| // Index order : ASC direction of search : ASC |
| // OR Index order : DESC direction of search : DESC |
| if ( (isAscendingOrder() AND isForwardSearch()) OR |
| ((NOT isAscendingOrder()) AND (NOT isForwardSearch())) ) |
| { |
| // Get the maximum value (Cases marked by the $ above) |
| endKeyValue_ = computeMissingKeyValue(prevBoundWasExclusive); |
| } |
| // Index order : ASC direction of search : DESC |
| // OR Index order : DESC direction of search : ASC |
| else if ( (isAscendingOrder() AND (NOT isForwardSearch())) OR |
| ((NOT isAscendingOrder()) AND isForwardSearch()) ) |
| { |
| // Get the minimum value (Cases marked by the X above) |
| endKeyValue_ = computeMissingKeyValue(NOT prevBoundWasExclusive); |
| } |
| setEndKeyBoundType(BOUND_TYPE_UPPER); |
| } // case: missing key predicate |
| |
| } // SearchKeyBounds::computeEndKeyValue() |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyBounds::computeMissingKeyValue() |
| // ----------------------------------------------------------------------- |
| ValueId SearchKeyBounds::computeMissingKeyValue(NABoolean wantMinValue) |
| { |
| const_cast<SearchKey&>(searchKey_)._countTimesBoundaryValReq += 1; |
| const NAType & type = getKeyColumnId().getItemExpr()->getValueId().getType(); |
| NAColumn *column = getKeyColumnId().getItemExpr()->getValueId().getNAColumn(); |
| ItemExpr *minMaxConstant; |
| |
| minMaxConstant = |
| new (CmpCommon::statementHeap()) |
| SystemLiteral(type.newCopy(CmpCommon::statementHeap()), |
| wantMinValue, |
| type.supportsSQLnull()); |
| |
| minMaxConstant->synthTypeAndValueId(); |
| |
| return minMaxConstant->getValueId(); |
| } // SearchKeyBounds::computeMissingKeyValue() |
| |
| |
| // ----------------------------------------------------------------------- |
| // Method for allocating the work space. |
| // ----------------------------------------------------------------------- |
| void SearchKeyBounds::print(FILE* ofd, const char* indent, const char* title) const |
| { |
| BUMP_INDENT(indent); |
| fprintf(ofd,"%s %s\n%s",NEW_INDENT,title,NEW_INDENT); |
| if (isAscendingOrder()) |
| fprintf(ofd,"ascending , "); |
| else |
| fprintf(ofd,"descending , "); |
| if (isForwardSearch()) |
| fprintf(ofd,"forward, "); |
| else |
| fprintf(ofd,"reverse, "); |
| NAString result; |
| beginKeyValue_.getItemExpr()->unparse(result); |
| switch (getBeginKeyBoundType()) |
| { |
| case BOUND_TYPE_LOWER: |
| fprintf(ofd,"lower bound(%s), ",result.data()); |
| break; |
| case BOUND_TYPE_EQUAL: |
| fprintf(ofd,"equal bound(%s), ", result.data()); |
| break; |
| case BOUND_TYPE_UPPER: |
| fprintf(ofd,"upper bound(%s), ", result.data()); |
| break; |
| default: |
| fprintf(ofd,"* Bound Not Set *, "); |
| break; |
| } |
| result.remove(0); // clear from position 0 to strlen |
| endKeyValue_.getItemExpr()->unparse(result); |
| switch (getEndKeyBoundType()) |
| { |
| case BOUND_TYPE_LOWER: |
| fprintf(ofd,"lower bound(%s), ",result.data()); |
| break; |
| case BOUND_TYPE_EQUAL: |
| fprintf(ofd,"equal bound(%s), ", result.data()); |
| break; |
| case BOUND_TYPE_UPPER: |
| fprintf(ofd,"upper bound(%s), ", result.data()); |
| break; |
| default: |
| fprintf(ofd,"* Bound Not Set *, "); |
| break; |
| } |
| } // SearchKeyBounds::print() |
| |
| // To be called from the debugger. |
| void SearchKeyBounds::display() const |
| { |
| SearchKeyBounds::print(); |
| } // SearchKeyBounds::display() |
| |
| // *********************************************************************** |
| // $$$ SearchKeyWorkSpace |
| // *********************************************************************** |
| |
| // ----------------------------------------------------------------------- |
| // Method for allocating the work space. |
| // ----------------------------------------------------------------------- |
| SearchKeyWorkSpace::SearchKeyWorkSpace(const ValueIdList & keyColumns, |
| const ValueIdList & indexOrder, |
| const NABoolean forwardSearch, |
| const SearchKey& searchKey) |
| : keyCount_(keyColumns.entries()), |
| keyBounds_(CmpCommon::statementHeap()), |
| searchKey_(searchKey) |
| { |
| // Initialize each SearchKeyBounds with the ValueId for the key |
| // column that it represents. |
| NABoolean ascDescFlag; |
| for (CollIndex iter = 0; iter < keyColumns.entries(); iter++) |
| { |
| if (indexOrder.entries() >= iter AND |
| indexOrder[iter].getItemExpr()->getOperatorType() == ITM_INVERSE) |
| ascDescFlag = FALSE; |
| else |
| ascDescFlag = TRUE; |
| keyBounds_.insertAt(iter, new(CmpCommon::statementHeap()) |
| SearchKeyBounds( |
| keyColumns[iter], |
| ascDescFlag, |
| forwardSearch, |
| getSearchKey()) |
| ); |
| } |
| } // SearchKeyWorkSpace::SearchKeyWorkSpace() |
| |
| // ----------------------------------------------------------------------- |
| // Method for deallocating the work space. |
| // ----------------------------------------------------------------------- |
| SearchKeyWorkSpace::~SearchKeyWorkSpace() |
| { |
| for (CollIndex iter = 0; iter < keyBounds_.entries(); iter++) |
| { |
| delete keyBounds_[iter]; |
| } |
| } // SearchKeyWorkSpace::~SearchKeyWorkSpace() |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyWorkSpace::analyzeSearchPredicates() |
| // |
| // Parameters: |
| // |
| // const ValueIdSet & setOfPredicates |
| // IN : The set of predicates that are to be analyzed for |
| // determining whether some of them can be used for |
| // forming the search key. |
| // |
| // const ValueIdSet & externalInputs |
| // IN : A read-only reference to the external inputs that are |
| // available to the operator that will be used for accessing |
| // data from the access path. |
| // |
| // ----------------------------------------------------------------------- |
| void SearchKeyWorkSpace::analyzeSearchPredicates |
| (const ValueIdSet & setOfPredicates, |
| const ValueIdSet & externalInputs |
| ) |
| { |
| for (CollIndex keyNum = 0; keyNum < (CollIndex)keyCount_; keyNum++) |
| { |
| // Set the value contained in the key predicate (if any) |
| // for this key column (i.e. bound): |
| keyBounds_[keyNum]->analyzeSearchPredicates |
| (setOfPredicates,externalInputs); |
| |
| // If this predicate is a > or < (i.e. exclusive) |
| // then do not try to set the bound for later columns |
| // because this may give the wrong answer. |
| // For instance, the table foo with columns |
| // A, B, C, where A, B are key columns has the following |
| // values: |
| // A B C |
| // 0 0 0 |
| // 1 10 100 |
| // 1 20 200 |
| // Assume we have the query |
| // select * from foo where A < 1 and B < 20; |
| // If we don't break after setting the bound for A: |
| // Because the low and high keys are like: |
| // LOW = (MIN,MIN) |
| // MAX = (1,20), exclusive |
| // Then we would get the first two rows back which is |
| // wrong. |
| // If instead we skip setting the bound for B because |
| // the bound for A is exclusive, then we get: |
| // LOW = (MIN,MIN) |
| // MAX = (1,MIN), exclusive |
| // which returns the correct answer (this is genesis case |
| // 10-980519-1753, |
| // The actual bound values for unset bounds are computed by |
| // method computeBeginKeyAndEndKeyValues. |
| |
| if(keyBounds_[keyNum]->isBeginValueExclusive() || |
| keyBounds_[keyNum]->isEndValueExclusive()) |
| { |
| break; |
| } |
| } // loop over index key columns |
| } // SearchKeyWorkSpace::analyzeSearchPredicate() |
| |
| |
| // ----------------------------------------------------------------------- |
| // SearchKeyWorkSpace::computeBeginAndEndKeyValues() |
| // |
| // Parameters: |
| // |
| // ValueIdList & lowerBoundValues |
| // OUT: Each element of this list contains the ValueId of a |
| // scalar expression that specifies a lower bound value. |
| // |
| // ValueIdList & upperBoundValues |
| // OUT: Each element of this list contains the ValueId of a |
| // scalar expression that specifies an upper bound value. |
| // |
| // ValueIdSet & setOfKeyPredicates |
| // OUT: The set of predicates that are used building the key. |
| // |
| // ValueIdSet & setOfNonKeyPredicates |
| // OUT: The set of predicates that must be evaluated as |
| // selection expressions on rows that are qualified |
| // by the index keys. |
| // |
| // ----------------------------------------------------------------------- |
| void SearchKeyWorkSpace::computeBeginKeyAndEndKeyValues |
| (const ValueIdList & keyColumns, |
| const ValueIdSet& externalInputs, |
| ValueIdList & lowerBoundValues, |
| ValueIdList & upperBoundValues, |
| ValueIdSet & setOfKeyPredicates, |
| ValueIdSet & setOfNonKeyPredicates, |
| NABoolean & keyPredicatesUnique, |
| NABoolean & beginKeyIsExclusive, |
| NABoolean & endKeyIsExclusive, |
| ValueId & beginKeyExclusionExpr, |
| ValueId & endKeyExclusionExpr, |
| NABoolean &allBeginKeysMissing, |
| NABoolean &allEndKeysMissing, |
| NABoolean &allChosenPredsAreEqualPreds) |
| { |
| // initialize variables for an equal predicate, the first non-equal |
| // key predicate that comes along (if any) may change the variables |
| keyPredicatesUnique = TRUE; |
| beginKeyIsExclusive = FALSE; |
| endKeyIsExclusive = FALSE; |
| beginKeyExclusionExpr = NULL_VALUE_ID; |
| endKeyExclusionExpr = NULL_VALUE_ID; |
| allBeginKeysMissing = TRUE; |
| allEndKeysMissing = TRUE; |
| allChosenPredsAreEqualPreds = TRUE; |
| |
| for (CollIndex keyNum = 0; keyNum < (CollIndex)keyCount_; keyNum++) |
| { |
| NABoolean beginKeyMissing = FALSE; |
| NABoolean endKeyMissing = FALSE; |
| // iterate over the key column list |
| // In each iteration we pickup the predicate on the key column and use it |
| // to add to the upper or the lower bound of the single subset scan |
| // The predicates to be used for each key column have already been determined. |
| // Consider for example a table with key columns a,b,c. |
| // Lets say we have predicates a = 1 and b = 2 and c = 3 |
| // The loop below will iterate over each column in clustering order, i.e. a, b, c |
| // for each column we'll determine the upper bound and lower bound |
| // For predicate a = 1 and b = 2 and c = 3 upper and lower bounds both will be |
| // (1, 2, 3). |
| // For predicate a > 1 and a < 10, the lower bound will be (1, min, min) and the |
| // upper bound will be (10, max, max). |
| // For predicate a = 1 and b < 2, the lower bound will be (1,2, min) and the |
| // upper bound will be (1, max, max) |
| // |
| // This code cannot directly handle MCRP (MultiColumnRangePredicates) i.e. |
| // predicates of the form (a, b, c) > (1, 2, 3). Since this predicate is infact |
| // a set of disjuncts and cannot be seperated out into conjuncts on each key column |
| // To fix this we transform (a, b, c) > (1, 2, 3) to (a >= 1) and ((a, b, c) > (1, 2, 3)) |
| // The predicate (a >= 1) can be used to give a lower bound of (1, min, min). |
| // Ofcourse predicate (a, b, c) > (1, 2, 3) implies a lower bound of (1, 2, 3). |
| // To get the complete lower bound implied by (a, b, c) > (1, 2, 3), the added predicate |
| // (a >= 1) is marked as being derived from (a, b, c) > (1, 2, 3). If a marked predicate |
| // is used to define begin/end keys we'll also fill in the added info from the parent |
| // predicate (i.e. in this case (a, b, c) > (1, 2, 3)) to get a complete lower/upper bound. |
| // So if predicate (a >= 1) is choosen, we'll try to see if it is derived from a MCRP, |
| // if so we'll fill in extra info to get a complete lower/upper bound, in our case instead |
| // of a lower bound of (1, min, min) we'll get a lower bound of (1, 2, 3) |
| // Also remember given key columns a, b, c, we only get key predicates upto the first |
| // range predicate |
| // Example 1: a = 1 and b > 2 and c < 3 |
| // For the predicate set above, we'll only use a = 1 and b > 2 as key predicates |
| // c < 3 will not be considered a key predicate |
| // Example 2: a = 1 and b = 2 and < 3 |
| // For the predicate set above, we use all the predicates as key predicates |
| SearchKeyBounds * keyBound = keyBounds_[keyNum]; |
| keyBound->computeBeginKeyAndEndKeyValues |
| (setOfKeyPredicates, |
| setOfNonKeyPredicates, |
| keyPredicatesUnique, |
| beginKeyIsExclusive, |
| endKeyIsExclusive, |
| beginKeyExclusionExpr, |
| endKeyExclusionExpr, |
| beginKeyMissing, |
| endKeyMissing, |
| allChosenPredsAreEqualPreds); |
| |
| // if the number of entries in the lower bound is less than the |
| // position of the key column we are currently iterating over. |
| // This basically makes sure that no other value has taken place |
| // of this value |
| if(lowerBoundValues.entries() < (keyNum+1)) |
| lowerBoundValues.insertAt(keyNum,keyBound->getBeginKeyValue()); |
| |
| // if there is a predicate that we used to derive this value for |
| // the lower bound, then check if that predicate was added by a |
| // MCRP and if so, try to get a better key coverage (i.e. more |
| // values in the lower bound) |
| // If there is no predicate, then we get a lower bound of min |
| if(keyBound->getBeginKeyPredicate() != NULL_VALUE_ID) |
| handleMultiColumnRangePred(TRUE, keyNum, lowerBoundValues); |
| |
| // if the number of entries in the upper bound is less than the |
| // position of the key column we are currently iterating over. |
| // This basically makes sure that no other value has taken place |
| // of this value |
| if(upperBoundValues.entries() < (keyNum+1)) |
| upperBoundValues.insertAt(keyNum,keyBound->getEndKeyValue()); |
| |
| // if there is a predicate that we used to derive this value for |
| // the upper bound, then check if that predicate was added by a |
| // MCRP and if so, try to get a better key coverage (i.e. more |
| // values in the upper bound) |
| // If there is no predicate, then we get a upper bound of max |
| if(keyBound->getEndKeyPredicate() != NULL_VALUE_ID) |
| handleMultiColumnRangePred(FALSE, keyNum, upperBoundValues); |
| |
| if (NOT beginKeyMissing) |
| allBeginKeysMissing = FALSE; |
| if (NOT endKeyMissing) |
| allEndKeysMissing = FALSE; |
| |
| } // loop over index key columns |
| |
| // Analyze those VEGPredicates that appear in the setOfKeyPredicates and |
| // replicate them as non-key predicates when they contain non-key columns. |
| getSearchKey().replicateNonKeyVEGPredicates(setOfKeyPredicates, |
| setOfNonKeyPredicates); |
| |
| if ((allChosenPredsAreEqualPreds) && |
| (allBeginKeysMissing) && |
| (allEndKeysMissing)) |
| allChosenPredsAreEqualPreds = FALSE; |
| |
| } // SearchKeyWorkSpace::computeBeginKeyAndEndKeyValues() |
| |
| NABoolean SearchKey::isNarrowNeeded() const |
| { |
| for (CollIndex i=0; i<beginKeyValues_.entries(); i++) |
| { |
| |
| const ValueId &keyColVID = getKeyColumns()[i]; |
| const ValueId &keyValVID = beginKeyValues_[i]; |
| |
| if (keyValVID.getType().errorsCanOccur(keyColVID.getType())) |
| return TRUE; |
| } |
| |
| for (CollIndex i=0; i<endKeyValues_.entries(); i++) |
| { |
| |
| const ValueId &keyColVID = getKeyColumns()[i]; |
| const ValueId &keyValVID = endKeyValues_[i]; |
| |
| if (keyValVID.getType().errorsCanOccur(keyColVID.getType())) |
| return TRUE; |
| } |
| |
| |
| return FALSE; |
| } |
| |
| NABoolean SearchKey::areAllKeysConstants(NABoolean convCheck) const |
| { |
| // loop over begin/end key values of all key columns |
| for (CollIndex i=0; i<beginKeyValues_.entries(); i++) |
| for (int k=0; k<=1; k++) |
| { |
| ItemExpr *key = (k==0 ? beginKeyValues_[i].getItemExpr() |
| : endKeyValues_[i].getItemExpr()); |
| OperatorTypeEnum oper = key->getOperatorType(); |
| ItemExpr *constVal = NULL; |
| |
| // For now we allow VEGRefs with constants or constants themselves |
| // (could extend to const params, host vars, char inputs, etc. later) |
| switch (oper) |
| { |
| case ITM_VEG_REFERENCE: |
| { |
| // do not pass 'TRUE' until run-time can properly replace constants |
| // in hbase search key with actual replacements when the query plan |
| // is reused. |
| ValueId v = |
| (static_cast<VEGReference *>(key))->getVEG()->getAConstant(/*TRUE*/); |
| if (v == NULL_VALUE_ID) |
| return FALSE; |
| else |
| constVal = v.getItemExpr(); |
| } |
| break; |
| |
| case ITM_CONSTANT: |
| constVal = key; |
| break; |
| |
| default: |
| // detected non-constants, bail out |
| return FALSE; |
| } |
| |
| if (convCheck) |
| { |
| // check if a conversion error could occur during key building |
| const NAType &srcType = constVal->getValueId().getType(); |
| const NAType &tgtType = getKeyColumns()[i].getType(); |
| |
| if (srcType.errorsCanOccur(tgtType, FALSE)) |
| return FALSE; |
| } |
| } // for every key column and for every begin/end key |
| return TRUE; |
| } |
| |
| //------------------------------------------ |
| // New methods to support histogram costing |
| //----------------------------------------- |
| CollIndex SearchKey::getKeyDisjunctEntries() const |
| { |
| CMPASSERT(getDisjuncts().entries() > 0); |
| return 1; // there is a single "disjunct" in a search key |
| // (the common disjuncts) |
| } |
| |
| void SearchKey::getKeyPredicatesByColumn(ColumnOrderList& keyPredsByCol, |
| CollIndex disjunctNumber) const |
| { |
| |
| // Remove all predicates in all columns: |
| keyPredsByCol.clearPredicates(); |
| |
| CMPASSERT(disjunctNumber == 0); // only a single disjunct in this case! |
| |
| // PRECONDITION: All columns must be valid (The ScanKey guarantees |
| // this) |
| |
| ValueIdSet commonPreds(getDisjuncts().getCommonPredicates()); |
| |
| // fill up the keyPredsByCol with key predicates from the common preds: |
| getKeyColumnOrderList(keyPredsByCol, /* out */ |
| commonPreds |
| ); |
| |
| // We need expressions like this: |
| // [=]^*[RANGE] |
| // i.e. A=3, B=4, C=:hv1, D>10 AND D < :hv2 |
| |
| // Find the column position (i.e. order) of last column of the single subset: |
| NABoolean moreColumnPositions = FALSE; |
| CollIndex order = 0; |
| for (; order < keyPredsByCol.entries(); order++) |
| { |
| // all columns are valid at this point: |
| CMPASSERT(keyPredsByCol.getPredicateExpressionPtr(order) != NULL); |
| |
| // resolve conflicts if necessary |
| if (keyPredsByCol.getPredicateExpressionPtr(order)->getType() == |
| ColumnOrderList::KeyColumn::CONFLICT |
| OR |
| keyPredsByCol.getPredicateExpressionPtr(order)->getType() == |
| ColumnOrderList::KeyColumn::CONFLICT_EQUALS) |
| { |
| // pick one of the predicates |
| keyPredsByCol.resolveConflict(order); |
| } |
| |
| // Exit the loop as soon as a predicate that it is not |
| // an (EQUAL or a CONFLICT_EQUALS) is reached |
| if (keyPredsByCol.getPredicateExpressionPtr(order)->getType() != |
| ColumnOrderList::KeyColumn::EQUAL |
| AND |
| keyPredsByCol.getPredicateExpressionPtr(order)->getType() != |
| ColumnOrderList::KeyColumn::CONFLICT_EQUALS ) |
| { // i.e A > 4 |
| moreColumnPositions = TRUE; |
| break; // but this predicate is not an equal |
| } |
| } // for |
| |
| // If the loop stoped earlier because a NON-EQUAL pred was |
| // encountered, check whether the next pred is a RANGE. |
| // If it is, it must be added to the single subset: |
| if (moreColumnPositions) |
| { |
| |
| // The column position (i.e. order) was advanced one after the last |
| // EQUALS (or it is 0 if there was no EQUALS), |
| // If the expression corresponding to this order is a range |
| // then advance it once more: |
| if (keyPredsByCol.getPredicateExpressionPtr(order)->getType() |
| == ColumnOrderList::KeyColumn::RANGE) |
| { // A=10 AND B > 10 and B < :hv |
| order++; // The expression is a range |
| } |
| } |
| |
| // (order-1) represents the column position |
| // of stop column. If order = 0, then there are no |
| // key predicates satisfying the single subset condition, |
| // thus, invalidate all columns: |
| if (order == 0) |
| { |
| // No key predicates in the single subset: |
| // all columns must be marked as |
| // empty: |
| keyPredsByCol.invalidateAllColumns(); |
| } |
| else |
| { |
| // There are some key predicates, the stop column |
| // is one less than the order: |
| keyPredsByCol.setStopColumn(--order); |
| } |
| |
| } // SearchKey::getKeyPredicatesByColumn(...) |
| |
| //for SearchKey allKeyPredicates and disjunctNumber are |
| //insignificant, but the signature of the function had to |
| //be changed because the signature was also changed in |
| //the parent class ScanKey |
| void SearchKey::getKeyPredicates(ValueIdSet &keyPredicates, /* out */ |
| NABoolean * allKeyPredicates, |
| CollIndex disjunctNumber) const |
| { |
| |
| CMPASSERT(keyPredicates.isEmpty()); // to preserve the semantics |
| |
| // The SearchKey contains exactly one disjunct: |
| CMPASSERT(disjunctNumber == 0); |
| |
| ColumnOrderList keyPredsByCol(getKeyColumns()); |
| getKeyPredicatesByColumn(keyPredsByCol); |
| |
| // We want to return *all* key predicates so that the |
| // key builder of SearchKey works as usual (some |
| // preds may be apply that violate the single subset |
| // property) |
| keyPredsByCol.getAllPredicates(keyPredicates); |
| |
| |
| } // getKeyPredicates(...) |
| |
| |
| // Generate the data structures needed by the generator and |
| // rewrite VEG preds and predicates: |
| void SearchKey::preCodeGen(ValueIdSet& executorPredicates, |
| const ValueIdSet& selectionPredicates, |
| const ValueIdSet & availableValues, |
| const ValueIdSet & inputValues, |
| VEGRewritePairs * vegPairsPtr, |
| NABoolean replicateExpression, |
| NABoolean partKeyPredsAdded) |
| { |
| // ----------------------------------------------------------------------- |
| // To implement take a look at FileScan::preCodeGen and move |
| // the key generation from there to here. You may have to add a |
| // couple of fields to the SearchKey class. |
| // ----------------------------------------------------------------------- |
| CMPASSERT(FALSE); // not implemented |
| |
| } |
| |
| // Replace the hostvariables that have changed during optimization. |
| // More than one set of host variables may have been created for |
| // logical subpartitioning under different circumstances. Make |
| // sure all nodes are updated to use the right ones. |
| void SearchKey::replaceBegEndPivs(ValueIdSet & oldPivs, |
| ValueIdSet & newPivs) |
| { |
| |
| for (CollIndex i = 0; i<getBeginKeyValues().entries() ; i++) |
| { |
| const ValueId& vid1 = (getBeginKeyValues())[i]; |
| const ValueId& vid2 = (getEndKeyValues())[i]; |
| if (oldPivs.contains(vid1) OR |
| oldPivs.contains(vid2)) |
| { |
| Int32 count = 0; |
| for (ValueId id = oldPivs.init(); oldPivs.next(id); oldPivs.advance(id)) |
| { |
| if (id == vid1 OR |
| id == vid2) |
| { |
| Int32 countn = 0; |
| for (ValueId idn = newPivs.init(); newPivs.next(idn); newPivs.advance(idn)) |
| { |
| if (countn == count AND id == vid1) |
| { |
| beginKeyValues_.removeAt(i); |
| beginKeyValues_.insertAt(i,idn); |
| } |
| if (countn == count AND id == vid2) |
| { |
| endKeyValues_.removeAt(i); |
| endKeyValues_.insertAt(i,idn); |
| } |
| countn += 1; |
| } |
| |
| } |
| count += 1; |
| } |
| } |
| } |
| } |
| |
| |
| |
| /////////////////////////////////////////////////////////////////// |
| // check whether two constants are equal. |
| /////////////////////////////////////////////////////////////////// |
| static NABoolean |
| bothAreIdenticalConstValues(const ValueId& vid1, const ValueId vid2) |
| { |
| // Constant values may have different ValueId. Check their text form. |
| if ( vid1.getItemExpr()->getOperatorType() != ITM_CONSTANT || |
| vid2.getItemExpr()->getOperatorType() != ITM_CONSTANT |
| ) |
| return FALSE; |
| |
| return ( (*vid1.getItemExpr()) == (*vid2.getItemExpr()) ); |
| } |
| |
| /////////////////////////////////////////////////////////////////// |
| // Check whether two veg references form the subset relationship. |
| // If either is included in the other, then they are equal. |
| /////////////////////////////////////////////////////////////////// |
| static NABoolean |
| areSubsetVegReferences(const ValueId& vid1, const ValueId vid2) |
| { |
| if ( |
| vid1.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE && |
| vid2.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE |
| ) |
| { |
| const ValueIdSet& set1 = |
| ((VEGReference *)(vid1.getItemExpr()))->getVEG()->getAllValues(); |
| |
| if ( set1.contains(vid2) ) return TRUE; |
| |
| const ValueIdSet& set2 = |
| ((VEGReference *)(vid2.getItemExpr()))->getVEG()->getAllValues(); |
| |
| if ( set2.contains(vid1) ) return TRUE; |
| } |
| |
| return FALSE; |
| } |
| |
| /////////////////////////////////////////////////////////////////// |
| // Check whether two veg references contain a common constant |
| // expression (constant, hostvar or dynamic param). |
| // If so, return TRUE. |
| // |
| // This routine is useful in handling cases where some vids in ValueId |
| // sets referred to by vid1 and vid2 are in different VEG regions |
| // (introduced by aggregates), but the two ValueId sets have a common |
| // constant expression (constants, hostvars or dynamic params). It is |
| // the common value that assures the two ValueId sets are equal. |
| // |
| // Example (assuming t and s are co-located and co-partitioned): |
| // begin |
| // select max(..) from t where t.a = 1; |
| // select min(..) from s where s.a = 1; |
| // end |
| // |
| // Both max() and min() introduce a new VEG region and therefore |
| // t.a and s.a will not be put into a single VEG. However, constant |
| // value 1 guarantees both are accessing the same partition. |
| // |
| /////////////////////////////////////////////////////////////////// |
| static NABoolean |
| containCommonConstantExpr(const ValueId& vid1, const ValueId vid2) |
| { |
| if ( |
| vid1.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE && |
| vid2.getItemExpr()->getOperatorType() == ITM_VEG_REFERENCE |
| ) |
| { |
| const ValueIdSet& set1 = |
| ((VEGReference *)(vid1.getItemExpr()))->getVEG()->getAllValues(); |
| ValueIdSet constExpr1; |
| set1.getConstantExprs(constExpr1); |
| |
| if ( constExpr1.isEmpty() ) |
| return FALSE; |
| |
| const ValueIdSet& set2 = |
| ((VEGReference *)(vid2.getItemExpr()))->getVEG()->getAllValues(); |
| ValueIdSet constExpr2; |
| set2.getConstantExprs(constExpr2); |
| |
| if ( constExpr2.isEmpty() ) |
| return FALSE; |
| |
| constExpr1.intersectSet(constExpr2); |
| return NOT constExpr1.isEmpty(); |
| } |
| |
| return FALSE; |
| } |
| |
| NABoolean |
| areVidsIdentical(const ValueId& x, const ValueId& y) |
| { |
| return |
| ( x == y OR |
| bothAreIdenticalConstValues(x, y) OR |
| areSubsetVegReferences(x, y) OR |
| containCommonConstantExpr(x, y) |
| ) ; |
| } |
| |
| NABoolean |
| SearchKey::areKeyValuesIdentical(const SearchKey* other, CollIndex n) const |
| { |
| CMPASSERT(n>=1 && n<=getBeginKeyValues().entries()); |
| CMPASSERT(n>=1 && n<=(other->getBeginKeyValues()).entries()); |
| |
| for (CollIndex i = 0; i<n ; i++) |
| { |
| const ValueId& vid1 = (getBeginKeyValues())[i]; |
| const ValueId& vid2 = (getEndKeyValues())[i]; |
| |
| // begin/end key value component of this key should be the same. |
| if ( vid1 != vid2 ) return FALSE; |
| |
| const ValueId& vid3 = (other->getBeginKeyValues())[i]; |
| const ValueId& vid4 = (other->getEndKeyValues())[i]; |
| |
| // begin/end key value component of the other key should be the same. |
| if ( vid3 != vid4 ) return FALSE; |
| |
| // Both keys should be the same. |
| if ( areVidsIdentical(vid1, vid3) == FALSE ) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| |
| // Given |
| // 1. a list of values representing boundary values (parameter boundaryValues) |
| // 2. they type of boundary begin key -> lower bound, end key -> upper bound |
| // this is specified by parameter isBeginKey |
| // 3. key column location, consider clustering key a, b ,c keyNum for a = 0 |
| // keyNum for b = 1 and keyNum for c=2 |
| // |
| // Add values to list of boundary values from a MCRP (MultiColumnRangePredicate) |
| // The SearchKey is normally not able to handle MCRPs (i.e. predicates of the form |
| // (a,b) > (1,2)). To fix this, we transform (a,b) > (1,2) => (a>=1) and (a,b) > (1,2) |
| // Consider table t1( a int, b int, c int, primary key (a,b)) and the following query |
| // |
| // select * |
| // from t1 |
| // where (a,b) > (1,2) |
| // |
| // Predicate (a,b) > (1,2) => will be transformed to (a>=1) and (a,b) > (1,2) |
| // The only key predicate considered will be (a>=1), since (a,b) > (1,2) cannot |
| // be handled by the SearchKey. |
| // Predicate (a>=1) will be used to define a begin key (i.e. scan lower bound of (1,min)) |
| // Since there is a predicate (a,b) > (1,2) we should try to get a begin key of (1,2). |
| // This method is called after the begin key value has been determined for |
| // column 'a'. The parameters will look like |
| // isBeginKey = TRUE |
| // keyNum = 0 |
| // boundaryValues=[1] |
| // |
| // What we intend to do is add value 2, to the list of boundary values |
| // i.e. get a list [1,2], as mentioned above this is derived from predicate |
| // (a,b) > (1,2). The predicate (a>=1) is special because it was derived from |
| // (a,b) > (1,2). Since it is a derived predicate, we added extra information |
| // to the predicate so that now we can use that information to get a better key |
| // in our case the key we are looking to get is (1,2) |
| void SearchKeyWorkSpace::handleMultiColumnRangePred(NABoolean isBeginKey, |
| CollIndex keyNum, |
| ValueIdList & boundaryValues) |
| { |
| // get a handle to the SearchKeyBounds associated with this key column |
| SearchKeyBounds * keyBound = keyBounds_[keyNum]; |
| |
| // ValueId of predicate on this key column |
| ValueId keyPredId = NULL_VALUE_ID; |
| |
| // ValueId of the value for this key column |
| // consider predicate (a,b) > (1,2) |
| // the value associated with column 'a' is 1 |
| ValueId keyValue = NULL_VALUE_ID; |
| |
| // is this the begin key or the end key |
| if(isBeginKey) |
| { |
| // if this is the begin key, get the id of |
| // the predicate used to derive the key value |
| // also get the key value |
| keyPredId = keyBound->getBeginKeyPredicate(); |
| keyValue = keyBound->getBeginKeyValue(); |
| } |
| else{ |
| // if this is the end key, get the id of |
| // the predicate used to derive the key value |
| // also get the key value |
| keyPredId = keyBound->getEndKeyPredicate(); |
| keyValue = keyBound->getEndKeyValue(); |
| } |
| |
| // if there is a key predicate, do further analysis |
| // if there isn't a key predicate on this column (i.e. keyNum) |
| // then we can't do anything, the value will be min/max value for |
| // column's datatype |
| if ( keyPredId != NULL_VALUE_ID ) |
| { |
| // get a handle to the key predicate expression |
| ItemExpr * keyItemExpr = keyPredId.getItemExpr(); |
| |
| // is there a range predicate on the key column |
| if (keyItemExpr->isARangePredicate()) |
| { |
| BiRelat * keyPred = (BiRelat *) keyItemExpr; |
| |
| // if there is a range predicate on the |
| // key column is it derived from a MCRP |
| if (keyPred->isDerivedFromMCRP()) |
| { |
| // yes it is derived from MCRP |
| // we can potentially augment |
| // the boundaryValues list |
| |
| // the list of comparisons implied on each column by a MCRP |
| // given predicate (a, b) < (3,4) the comparison implied |
| // on each 'a' and 'b' is '<' |
| // Note this can be different if key_range_compare or |
| // the directby by clause is used. Example |
| // (a, b) < (3, 4) => (a < 3) OR (a = 3 and b < 4) |
| // if the same predicate is used with the directedby clause |
| // e.g. (a,b) < (3,4) directedby (asc,desc) the predicate |
| // will be transformed to (a < 3) OR (a = 3 and b > 4) |
| // Notice the comparison on b is now '>' instead of '<' |
| ValueIdList listOfComparisons; |
| |
| // list of left children of the MCRP |
| // given MCRP (a,b) < (3,4), this list |
| // will contain (a,b) |
| ValueIdList leftChildList; |
| |
| // list of right children of the MCRP |
| // given MCRP (a,b) < (3,4), this list |
| // will contain (3,4) |
| ValueIdList rightChildList; |
| |
| // get the MCRP info stored in the derived predicate |
| // given predicate (a,b) < (3,4), we'll add a derived |
| // predicate and transform (a,b) < (3,4) to |
| // (a <= 3) or (a,b) < (3,4), the derived predicate |
| // in this case is (a <= 3) |
| keyPred->getListOfComparisonIds(listOfComparisons); |
| keyPred->getLeftMCRPChildList(leftChildList); |
| keyPred->getRightMCRPChildList(rightChildList); |
| |
| // figure out if value is the left or the right child |
| // We need to figure out if the value of the key is on |
| // the right or the left. |
| // Consider MCRP (a,b) > (1,2), the same can be written as |
| // (1,2) < (a,b). |
| ItemExpr * leftChildItemExpr = (ItemExpr *) keyPred->child(0); |
| |
| // by default assume literals are at right i.e. predicate of |
| // the form (a,b) > (1,2) |
| NABoolean literalsAtLeft = FALSE; |
| |
| if ( leftChildItemExpr->getOperatorType() == ITM_VEG_REFERENCE) |
| { |
| // if child is a VEG |
| VEGReference * leftChildVEG = (VEGReference *) leftChildItemExpr; |
| if(leftChildVEG->getVEG()->getAllValues().contains(keyValue)) |
| { |
| // literals are at left i.e. predicate is of the form |
| // (1,2) < (a, b) |
| literalsAtLeft = TRUE; |
| } |
| } |
| else{ |
| // child is not a VEG |
| if ( keyValue == leftChildItemExpr->getValueId() ) |
| { |
| // literals are at left i.e. predicate is of the form |
| // (1,2) < (a, b) |
| literalsAtLeft = TRUE; |
| } |
| } |
| |
| // list of columns in the MCRP |
| // given predicate (a,b) > (1,2) |
| // this is (a,b) |
| ValueIdList predColList; |
| |
| // list of literals in the MCRP |
| // given predicate (a,b) > (1,2) |
| // this is (1,2) |
| ValueIdList literalList; |
| |
| if (literalsAtLeft) |
| { |
| // if literals at left then list |
| // of literals is at left |
| // i.e. predicate of the form |
| // (1,2) < (a,b) |
| predColList = rightChildList; |
| literalList = leftChildList; |
| } |
| else{ |
| // if literals at right then list |
| // of literals is a right |
| // i.e. predicate of the form |
| // (a,b) > (1,2) |
| predColList = leftChildList; |
| literalList = rightChildList; |
| } |
| |
| // the following loop compares the list of |
| // columns in the predicate to the list of |
| // key columns in the access path |
| // The ordering of the columns is also compared |
| // Consider predicate (a,b) > (1,2) |
| // and two access paths |
| // 1. with key (a,b) both in ascending order |
| // 2. with key (a,b) with 'a' ascending and |
| // 'b' descending |
| // for access path 1, we can use a begin key |
| // of (1,2). |
| // for access path 2, we can only use a begin key |
| // of (1, max), max because b is descending. |
| |
| |
| // location in list of literals/columns |
| // starting from second location (i.e. predLocation=1) |
| // because first location already matches, |
| // consider MCRP (a,b) > (1,2) |
| // by the time we get here we already have |
| // a key value of 1 for column 'a', this is |
| // because (a,b) > (1,2) => (a>=1) or (a,b) > (1,2) |
| // the predicate (a>=1) has already been used to |
| // get the key value for column a. |
| CollIndex predLocation = 1; |
| |
| for (CollIndex remainingKeyCols = keyNum + 1; |
| remainingKeyCols < (CollIndex)keyCount_; |
| remainingKeyCols++) |
| { |
| SearchKeyBounds * kBound = keyBounds_[remainingKeyCols]; |
| |
| // see if columns match |
| ValueId keyCol = kBound->getKeyColumnId(); |
| |
| // if we have iterated over all the columns in the MCRP |
| if (listOfComparisons.entries() == predLocation) |
| return; |
| |
| // get ValueId of the comparison on the column |
| ValueId pred = listOfComparisons[predLocation]; |
| |
| // get ValueId of the predicate column |
| ValueId predCol = predColList[predLocation]; |
| |
| // get ValueId of the key value associated with |
| // the predicate column |
| ValueId literal = literalList[predLocation]; |
| |
| // get ItemExpr representing predicate column |
| ItemExpr * predColExpr = predCol.getItemExpr(); |
| |
| // get ItemExpr representing associated value |
| ItemExpr * literalExpr = literal.getItemExpr(); |
| |
| ValueId keyLiteral = NULL_VALUE_ID; |
| |
| // match the clustering order of the key column |
| // with the comparison on the predicate column |
| // Begin match |
| |
| // get type of comparison on the predicate column |
| OperatorTypeEnum comparisonType = (pred.getItemExpr())->getOperatorType(); |
| |
| // get the inverse of the comparison on the predicate column |
| OperatorTypeEnum inverseComparison = (pred.getItemExpr())->getInverseOpType(); |
| |
| // this is the comparsion to check, it should be |
| // >, >= for begin keys |
| // and <, <= for end keys |
| OperatorTypeEnum comparisonToCheck = comparisonType; |
| |
| // if literals are at left reverse the comparison type |
| if(literalsAtLeft) |
| { |
| if(comparisonToCheck == comparisonType) |
| { |
| comparisonToCheck = inverseComparison; |
| } |
| else{ |
| comparisonToCheck = comparisonType; |
| } |
| } |
| |
| // if key column is descending reverse comparsion type |
| if(!kBound->isAscendingOrder()) |
| { |
| if(comparisonToCheck == comparisonType) |
| { |
| comparisonToCheck = inverseComparison; |
| } |
| else{ |
| comparisonToCheck = comparisonType; |
| } |
| } |
| |
| // if it's a reverse scan, reverse comparison type |
| if(!kBound->isForwardSearch()) |
| { |
| if(comparisonToCheck == comparisonType) |
| { |
| comparisonToCheck = inverseComparison; |
| } |
| else{ |
| comparisonToCheck = comparisonType; |
| } |
| } |
| |
| // after doing all the manipulations of the comparison type above |
| // the comparisonToCheck should be |
| // * >, >= for begin keys |
| // * <, <= for end keys |
| // Otherwise it's not a match |
| if (isBeginKey) |
| { |
| if((comparisonToCheck == ITM_LESS) || |
| (comparisonToCheck == ITM_LESS_EQ)) |
| return; |
| } |
| else{ |
| if((comparisonToCheck == ITM_GREATER) || |
| (comparisonToCheck == ITM_GREATER_EQ)) |
| return; |
| |
| } |
| // End match |
| |
| // the ordering matched, now lets check if the predicate column |
| // and the key column match |
| NABoolean predContainsKeyCol = FALSE; |
| |
| if (predColExpr->getOperatorType() == ITM_VEG_REFERENCE) |
| { |
| // if predicate Col is a VEG, check if that VEG contains |
| // the key column |
| VEGReference * predColVEG = (VEGReference *) predColExpr; |
| if(predColVEG->getVEG()->getAllValues().contains(keyCol)) |
| predContainsKeyCol = TRUE; |
| } |
| else{ |
| // predicate column is not a VEG, then it should be |
| // the key column, otherwise no match |
| if(keyCol == predCol) |
| predContainsKeyCol = TRUE; |
| } |
| |
| // if the columns matched |
| if (predContainsKeyCol) |
| { |
| //check if the literal is a constant, hostvar or parameter |
| //if not, we cannot use it for defining a key value |
| if (literalExpr->getOperatorType() == ITM_VEG_REFERENCE) |
| { |
| // if literal is a VEG, check if the VEG contains something |
| // we can use to define a key value |
| VEGReference * literalVEG = (VEGReference *) literalExpr; |
| |
| ValueId keyLiteral = literalVEG->getVEG()-> |
| getAConstantHostVarOrParameter(); |
| |
| if (keyLiteral == NULL_VALUE_ID) |
| return; |
| } |
| else{ |
| // literal is not a VEG, then it must be a constant expression |
| if(!literalExpr->doesExprEvaluateToConstant(FALSE,TRUE)) |
| return; |
| } |
| |
| // columns matched and we can use the literal to specify |
| // key value, therefore add literal to list of keyValues |
| // i.e. boundaryValues |
| boundaryValues.insertAt(remainingKeyCols, literal); |
| } |
| else{ |
| // columns did not match, return |
| return; |
| } |
| |
| // update to next location in MCRP |
| predLocation++; |
| } |
| } |
| } |
| } |
| } |
| |
| HivePartitionAndBucketKey::HivePartitionAndBucketKey( |
| const HHDFSTableStats *hdfsTableStats,// IN |
| const ValueIdList &hivePartColList, // IN |
| const ValueIdList &hiveBucketColList, // IN |
| ValueIdSet & setOfPredicates) // IN/OUT |
| : hdfsTableStats_(hdfsTableStats), |
| hivePartColList_(hivePartColList), |
| hiveBucketColList_(hiveBucketColList), |
| selectedPartitions_(&(((HHDFSTableStats *) hdfsTableStats)->listPartitionStatsList_), (CollHeap *) NULL) |
| { |
| // mark all buckets as selected |
| selectedSingleBucket_ = -1; |
| // select all partitions by default |
| selectedPartitions_.complement(); |
| } |
| |
| void HivePartitionAndBucketKey::accumulateSelectedStats( |
| HHDFSStatsBase &result) |
| { |
| for (CollIndex i=0; |
| selectedPartitions_.nextUsed(i); |
| i++) |
| { |
| HHDFSListPartitionStats *lps = selectedPartitions_.element(i); |
| |
| // if selecting a single bucket add only that, otherwise |
| // add the entire list partition |
| const HHDFSStatsBase *itemToAdd = lps; |
| |
| if (isSingleBucketSelected()) |
| itemToAdd = (*lps)[selectedSingleBucket_]; |
| if (itemToAdd) |
| result.add(itemToAdd); |
| } |
| } |
| |
| NABoolean HivePartitionAndBucketKey::getNextFile(HiveFileIterator &i) |
| { |
| while (-1) |
| { |
| switch (i.l_) |
| { |
| case 1: |
| |
| // list partition level |
| if (selectedPartitions_.nextUsed(i.p_)) |
| { |
| i.l1PartStats_ = selectedPartitions_.element(i.p_); |
| // reset bucket and file levels |
| i.l2BucketStats_ = NULL; |
| i.l3FileStats_ = NULL; |
| i.b_ = 0; |
| i.f_ = 0; |
| i.l_ = 2; // go on to find a selected bucket |
| } |
| else |
| { |
| // no more partitions, we are done, invalidate iterator |
| i.l_ = 999; |
| return FALSE; |
| } |
| // fall through to next case |
| |
| case 2: |
| |
| // bucket level |
| if (selectedSingleBucket_ >= 0) |
| { |
| if (i.b_ <= (CollIndex)selectedSingleBucket_) |
| // starting on this partition, try the one selected bucket |
| i.b_ = (CollIndex)selectedSingleBucket_; |
| else |
| // past the single selected bucket, select no more |
| i.b_ = (CollIndex)(i.l1PartStats_->getLastValidBucketIndx() + 1); |
| } |
| else |
| { |
| // go to next bucket that exists |
| while ((*i.l1PartStats_)[i.b_] == NULL && |
| i.b_ <= (CollIndex)i.l1PartStats_->getLastValidBucketIndx()) |
| i.b_++; |
| } |
| |
| i.l2BucketStats_ = (*i.l1PartStats_)[i.b_]; |
| if (i.l2BucketStats_ == NULL) |
| { |
| // no more buckets, advance at partition level |
| i.p_++; |
| i.l_ = 1; |
| break; |
| } |
| else |
| { |
| // found a new bucket, reset file level |
| i.l3FileStats_ = NULL; |
| i.f_ = 0; |
| i.l_ = 3; // go on to find a file |
| } |
| |
| // fall through to next case |
| |
| case 3: |
| |
| // file level |
| |
| if (i.f_ < i.l2BucketStats_->entries()) |
| { |
| // found a file |
| i.l3FileStats_ = (*i.l2BucketStats_)[i.f_]; |
| |
| // advance index to point to a future file |
| i.f_++; |
| |
| // found a new file |
| return TRUE; |
| } |
| else |
| { |
| // no more files, advance at bucket level |
| i.b_++; |
| i.l_ = 2; |
| } |
| break; |
| |
| default: |
| // iterator has reached the end |
| return FALSE; |
| } |
| } |
| } |
| |
| HbaseSearchKey::HbaseSearchKey(const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates, // the disjunct predicate |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const IndexDesc *UDIndexDesc, |
| const ValueIdSet & outputsByHbaseScan) : |
| SearchKey(keyColumns, |
| orderOfKeyValues, |
| externalInputs, |
| forwardSearch, |
| setOfPredicates, |
| nonKeyColumnSet, |
| indexDesc, |
| UDIndexDesc), |
| keyColumns_(keyColumns), |
| nonKeyColumns_(nonKeyColumnSet) |
| { |
| // SearchKey() removes the key predicates from setOfPredicates. |
| // When we are here, setOfPredicates contain predicates not in |
| // in SearchKey. These predicates should be placed in executorPred |
| // in FileScan. Save such setOfPredicates. |
| |
| nonKeyPreds_ = setOfPredicates; |
| |
| requiredOutputColumns_ = keyColumns_; |
| requiredOutputColumns_.insert(nonKeyColumns_); |
| isFalsePred_ = FALSE; |
| } |
| |
| |
| void SearchKey::computeCoveredLeadingKeys() |
| { |
| coveredLeadingKeys_ = 0; |
| |
| // compute the # of leading key columns covered by the key columns |
| // in the disjunct. |
| |
| ValueIdSet columnsCoveredByKey; |
| |
| const ValueIdSet & keyPreds = getKeyPredicates() ; |
| ValueId vid = keyPreds.init(); |
| for (; keyPreds.next(vid); keyPreds.advance(vid)) { |
| vid.getItemExpr()->findAll(ITM_BASECOLUMN, columnsCoveredByKey, TRUE, TRUE); |
| } |
| |
| // If the key column is an INDEX COLUMN, convert it into a base column |
| |
| ValueIdList baseKeyCols; |
| |
| const ValueIdList& keyColumns = getKeyColumns(); |
| for (CollIndex i=0; i<keyColumns.entries(); i++ ) { |
| if ( keyColumns[i].getItemExpr()->getOperatorType() == ITM_INDEXCOLUMN ) |
| vid = ((IndexColumn*)(keyColumns[i].getItemExpr()))->getDefinition(); |
| else |
| vid = keyColumns[i]; |
| |
| baseKeyCols.insertAt(baseKeyCols.entries(), vid); |
| } |
| |
| coveredLeadingKeys_ = 0; |
| |
| for (CollIndex i=0; i<baseKeyCols.entries(); i++ ) { |
| if ( columnsCoveredByKey.containsTheGivenValue(baseKeyCols[i]) ) |
| coveredLeadingKeys_ ++; |
| else { |
| // do a search by column position |
| BaseColumn* keyCol = (BaseColumn*)(baseKeyCols[i].getItemExpr()); |
| |
| NABoolean found = FALSE; |
| |
| ValueId cvid = columnsCoveredByKey.init(); |
| for (; columnsCoveredByKey.next(cvid); columnsCoveredByKey.advance(cvid)) { |
| BaseColumn* colInKeyPred = (BaseColumn*)(cvid.getItemExpr()); |
| |
| if ( keyCol->getColNumber() == colInKeyPred->getColNumber() ) { |
| coveredLeadingKeys_++; |
| found = TRUE; |
| break; |
| } |
| } |
| |
| if ( !found ) |
| break; |
| } |
| } |
| } |
| |
| void SearchKey::computeFullKeyPredicates(ValueIdSet& predicates) |
| { |
| // If the key column is an INDEX COLUMN, convert it into a base column |
| ValueIdList baseKeyCols; |
| |
| ValueId vid; |
| const ValueIdList& keyColumns = getKeyColumns(); |
| for (CollIndex i=0; i<keyColumns.entries(); i++ ) { |
| if ( keyColumns[i].getItemExpr()->getOperatorType() == ITM_INDEXCOLUMN ) |
| vid = ((IndexColumn*)(keyColumns[i].getItemExpr()))->getDefinition(); |
| else |
| vid = keyColumns[i]; |
| |
| baseKeyCols.insertAt(baseKeyCols.entries(), vid); |
| } |
| |
| // compute the # of leading key columns covered by the key columns |
| // in the disjunct. |
| ValueIdSet columnsCoveredByKey; |
| |
| vid = predicates.init(); |
| for (; predicates.next(vid); predicates.advance(vid)) { |
| |
| columnsCoveredByKey.clear(); |
| vid.getItemExpr()->findAll(ITM_BASECOLUMN, columnsCoveredByKey, TRUE, TRUE); |
| |
| // for each vid (or predicate), check the base column list |
| for (CollIndex i=0; i<baseKeyCols.entries(); i++ ) { |
| |
| NABoolean found = FALSE; |
| |
| // the predicate directly refers the base column |
| if ( columnsCoveredByKey.containsTheGivenValue(baseKeyCols[i]) ) { |
| fullKeyPredicates_ += vid; |
| found = TRUE; |
| } else { |
| // do a search by column position |
| BaseColumn* keyCol = (BaseColumn*)(baseKeyCols[i].getItemExpr()); |
| |
| |
| ValueId cvid = columnsCoveredByKey.init(); |
| for (; columnsCoveredByKey.next(cvid); columnsCoveredByKey.advance(cvid)) { |
| BaseColumn* colInKeyPred = (BaseColumn*)(cvid.getItemExpr()); |
| |
| // the key column in the predicate is the same as the base column |
| if ( keyCol->getColNumber() == colInKeyPred->getColNumber() ) { |
| fullKeyPredicates_ += vid; |
| found = TRUE; |
| break; |
| } |
| } |
| } |
| |
| if ( found ) |
| break; |
| } |
| } |
| } |
| |
| // --------------------------------------------------------------------------- |
| // Take a set of predicates and make one or more HBase search keys from it. |
| // Parameters are the same as for HbaseSearchKey constructor, with the |
| // following differences: |
| // |
| // setOfPredicates: Returns executor predicates for all the generated |
| // search keys (eliminates only those predicates that |
| // are key preds for all returned search keys) |
| // producedKeys: Returns a list of search keys produced |
| // useBuiltinSearchKey: Indicates whether we should use the built-in |
| // search key or whether we can optimize and build |
| // one or more scan and single row instructions for |
| // HBase that use only constants |
| // |
| // This method tries to transform RangeSpecs on key columns into |
| // multiple HbaseSearchKeys if possible. |
| // This can be removed if/when we support MDAM for HBase scans. |
| // --------------------------------------------------------------------------- |
| void HbaseSearchKey::makeHBaseSearchKeys( |
| const SearchKey * builtinSearchKey, |
| const ValueIdList & keyColumns, |
| const ValueIdList & orderOfKeyValues, |
| const ValueIdSet & externalInputs, |
| const NABoolean forwardSearch, |
| ValueIdSet & setOfPredicates, |
| const ValueIdSet& nonKeyColumnSet, |
| const IndexDesc *indexDesc, |
| const ValueIdSet & outputsByHbaseScan, |
| LIST(HbaseSearchKey *) &producedKeys) |
| { |
| // Range Specs for key columns, ordered by key position, plus the |
| // normalized item expressions generated from the RangeSpecs |
| ARRAY(ItemExpr *) rangeSpecsForKeyCols(HEAP); |
| ARRAY(ItemExprList *) rangeBackbonesForKeyCols(HEAP); |
| ValueIdSet tempExternalInputs; // don't use non-consts as inputs |
| |
| producedKeys.clear(); |
| |
| ///////////////////////////////////////////////////////////////////////// |
| // Look at the case where we have some RangeSpecs for key columns. |
| // Those can't be used for a single subset scan or a regular SearchKey. |
| // Solution: Create multiple SearchKeys, one for each range in the RangeSpecs. |
| // Example, with key columns (a,b,c): |
| // WHERE (a=1 or a=3 or a>5) and b>0 and c in (2,4) |
| // This has a RangeSpecRef on columns a and c. |
| // It will generate 3*2=6 HbaseSearchKeys: |
| // a=1 and b>0 and c=2 |
| // a=1 and b>0 and c=4 |
| // a=3 and b>0 and c=2 |
| // a=3 and b>0 and c=4 |
| // a>5 and b>0 and c=2 |
| // a>5 and b>0 and c=4 |
| // |
| // Note that this will not always preserve the ordering of the data |
| // (we could enhance this later to detect some order-preserving cases) |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // Step 1: Walk through exe preds and search for RangeSpecRefs on key columns |
| |
| for (ValueId e=setOfPredicates.init(); setOfPredicates.next(e); setOfPredicates.advance(e)) |
| if (e.getItemExpr()->getOperatorType() == ITM_RANGE_SPEC_FUNC) |
| { |
| RangeSpecRef* rangeIE = (RangeSpecRef *) e.getItemExpr(); |
| OptNormRangeSpec* destObj = rangeIE->getRangeObject(); |
| ItemExpr* rangeCol = destObj->getRangeExpr(); |
| |
| // check whether this RangeSpecRef is on one of the key columns |
| for (CollIndex k=0; k<indexDesc->getIndexKey().entries(); k++) |
| if (rangeCol->containsTheGivenValue(indexDesc->getIndexKey()[k])) |
| { |
| // found a RangeSpecRef on key column k |
| if (rangeBackbonesForKeyCols.used(k)) |
| { |
| // we have a conflict, multiple RangeSpecs for the same column |
| // (this should not be very common) |
| // Could merge the RangeSpecs or just pick one. Pick one for now. |
| break; |
| } |
| else |
| { |
| rangeSpecsForKeyCols.insert(k, e.getItemExpr()); |
| |
| // found a RangeSpecRef on key column i, make OR backbone into a list |
| ItemExprList *rangeBB = new(CmpCommon::statementHeap()) |
| ItemExprList(destObj->getRangeItemExpr(), |
| CmpCommon::statementHeap(), |
| ITM_OR, |
| FALSE, |
| FALSE); |
| rangeBackbonesForKeyCols.insert(k, rangeBB); |
| } |
| break; |
| } |
| } |
| |
| // skip steps 2-5 if there are no interesting RangeSpecs |
| if (rangeBackbonesForKeyCols.entries() > 0) |
| { |
| // Step 2: check how many key columns we should use to expand RangeSpecs |
| |
| // List of ValueIdSets, one for each HbaseSearchKey to be generated |
| LIST(ValueIdSet) predListForSearchKeys(STMTHEAP); |
| |
| // remember range specs and their associated individual preds |
| ValueIdSet rangeSpecs, rangePreds, allKeyPreds; |
| |
| // simplest case is a single HbaseSearchKey, if we find no RangeSpecs below |
| predListForSearchKeys.insert(setOfPredicates); |
| |
| Lng32 maxNumSearchKeys = getDefaultAsLong(HBASE_MAX_NUM_SEARCH_KEYS); |
| Lng32 lastKeyColToUse = -1; |
| Lng32 currNumSearchKeys = 1; |
| |
| for (CollIndex j=0; j<indexDesc->getIndexKey().entries(); j++) |
| { |
| if (rangeBackbonesForKeyCols.used(j)) |
| { |
| currNumSearchKeys *= rangeBackbonesForKeyCols[j]->entries(); |
| |
| if (currNumSearchKeys > maxNumSearchKeys) |
| { |
| // exceeded max # of search keys, |
| // stop at the column before this one |
| currNumSearchKeys /= rangeBackbonesForKeyCols[j]->entries(); |
| |
| break; |
| } |
| } |
| else |
| { |
| // if we don't have a range spec for this column, stop |
| // at any column that is not set to a constant |
| // If we generate more than one search key, their begin/end |
| // key values cannot contain anything than constants |
| ItemExpr *keyCol = indexDesc->getIndexKey()[j].getItemExpr(); |
| ValueId keyColVegRef; |
| |
| if (keyCol->getOperatorType() == ITM_INDEXCOLUMN) |
| { |
| // Go from IndexColumn to NAColumn to VEG and check for constant values |
| Lng32 baseColNum = |
| (static_cast<IndexColumn *>(keyCol))->getNAColumn()->getPosition(); |
| keyColVegRef = |
| indexDesc->getPrimaryTableDesc()->getColumnVEGList()[baseColNum]; |
| if ((static_cast<VEGReference *>(keyColVegRef.getItemExpr()))->getVEG()-> |
| getAConstant(TRUE) == NULL_VALUE_ID) |
| { |
| // Column does not seem to have a <col> = <const> predicate, stop |
| currNumSearchKeys = 0; |
| break; |
| } |
| } |
| else |
| break; // should not get here |
| } |
| // if we made it to this point, we can use a RangeSpec |
| // for this column to generate more HbaseSearchKeys |
| lastKeyColToUse = j; |
| } // check how many key columns to use |
| |
| |
| // Step 3: Starting with the trailing key column to use, walk over |
| // RangeSpecRefs if there are any, and multiply the number |
| // of existing HbaseSearch keys by the number of ranges |
| |
| for (Lng32 i=lastKeyColToUse; i>=0; i--) |
| if (rangeBackbonesForKeyCols.used(i)) |
| { |
| // temporary list to form cartesian product |
| LIST(ValueIdSet) tempListOfSearchKeys(predListForSearchKeys); |
| ItemExprList *ranges = rangeBackbonesForKeyCols[i]; |
| ValueId rangeSpecValId = rangeSpecsForKeyCols[i]->getValueId(); |
| |
| // clear the main list of search keys before re-populating it |
| predListForSearchKeys.clear(); |
| rangeSpecs += rangeSpecValId; |
| |
| // form a cartesian product between the ranges of the RangeSpecRef |
| // and the existing list of ValueIdSets for search keys |
| for (CollIndex r=0; r<ranges->entries(); r++) |
| { |
| for (CollIndex p=0; p<tempListOfSearchKeys.entries(); p++) |
| { |
| // take one existing set for a search key and remove e |
| // note that this set must contain e |
| ValueIdSet newPreds(tempListOfSearchKeys[p]); |
| // replace the RangeSpecRef with its range r |
| newPreds -= rangeSpecValId; |
| newPreds += (*ranges)[r]->getValueId(); |
| rangePreds += (*ranges)[r]->getValueId(); |
| CMPASSERT(!(newPreds == tempListOfSearchKeys[p])); |
| // newPreds += new range pred |
| predListForSearchKeys.insert(newPreds); |
| } // loop over list of preds for search keys |
| } // loop over ranges of RangeSpec |
| } // have a RangeSpec for key column i |
| |
| //CMPASSERT(predListForSearchKeys.entries() == currNumSearchKeys); |
| |
| // all original preds (including RangeSpecs) |
| ValueIdSet commonKeyPreds(setOfPredicates); |
| |
| // Step 4: convert the list of predicates for search keys |
| // into actual HbaseSearchKeys |
| |
| for (CollIndex s=0; s<predListForSearchKeys.entries(); s++) |
| { |
| // preds that go into SearchKey (RangeSpecs are replaced) |
| ValueIdSet keyPreds(predListForSearchKeys[s]); |
| |
| HbaseSearchKey *searchKeyPtr = new(CmpCommon::statementHeap()) |
| HbaseSearchKey(keyColumns, |
| orderOfKeyValues, |
| tempExternalInputs, |
| forwardSearch, |
| predListForSearchKeys[s], |
| nonKeyColumnSet, |
| builtinSearchKey ? builtinSearchKey->getIndexDesc() |
| : indexDesc, |
| builtinSearchKey ? builtinSearchKey->getUDIndexDesc() |
| : NULL, |
| outputsByHbaseScan); |
| if (searchKeyPtr->areAllKeysConstants(TRUE)) |
| { |
| // Make sure the new search key is not identical to |
| // any one already collected. Duplicated hbase search |
| // keys will lead to duplcated rows. |
| |
| NABoolean found = FALSE; |
| for (CollIndex k=0; k<producedKeys.entries(); k++) { |
| if ( producedKeys[k]->isEqualTo(searchKeyPtr) ) { |
| found = TRUE; |
| break; |
| } |
| } |
| |
| // now setup the hbase begin and end key |
| // Note that all begin/end key values have to be |
| // constant values if we want to produce more than |
| // the single search key computed above |
| |
| if ( !found ) |
| producedKeys.insert(searchKeyPtr); |
| |
| } |
| else |
| { |
| // This should be relatively uncommon, but this search |
| // key has some non-constants in it, even though the |
| // single search key didn't. Fall back to the single |
| // search key case. |
| producedKeys.clear(); |
| break; |
| } |
| |
| // determine key preds common to all search keys so far |
| keyPreds -= predListForSearchKeys[s]; |
| allKeyPreds += keyPreds; |
| commonKeyPreds.intersectSet(keyPreds); |
| } |
| |
| // Step 5: determine executor predicates to use: |
| // - predicates that are non-key predicates for |
| // any of the search keys |
| // - RangeSpecs, unless all of their ranges |
| // have been used as key predicates |
| if (producedKeys.entries() > 0) |
| { |
| setOfPredicates -= commonKeyPreds; |
| if (allKeyPreds.contains(rangePreds)) |
| setOfPredicates -= rangeSpecs; |
| } |
| } // found RangeSpecs |
| |
| if (producedKeys.isEmpty()) |
| { |
| // convert the built-in search key to an HbaseSearchKey |
| // (caller already verified that it only used constants) |
| HbaseSearchKey *searchKeyPtr = new(CmpCommon::statementHeap()) |
| HbaseSearchKey(keyColumns, |
| orderOfKeyValues, |
| tempExternalInputs, |
| forwardSearch, |
| setOfPredicates, |
| nonKeyColumnSet, |
| builtinSearchKey ? builtinSearchKey->getIndexDesc() |
| : indexDesc, |
| builtinSearchKey ? builtinSearchKey->getUDIndexDesc() |
| : NULL, |
| outputsByHbaseScan); |
| if (searchKeyPtr->areAllKeysConstants(TRUE)) |
| producedKeys.insert(searchKeyPtr); |
| } |
| } |
| |
| /* |
| NABoolean HbaseSearchKey::searchOnKeyColumnsOnly() |
| { |
| if ( nonKeyPreds_.entries() == 0 ) |
| return TRUE; |
| |
| // It has been observed that for Table T(a,b,c,d, primary key(a,b,c)) and DELETE query |
| // |
| // delete from t010t2 where a in (1,2,4) and b='a' and (c in (1,3) or c>=5), |
| // |
| // (HBASE.HBASE.T010T2.B = 'a' = HBASE.HBASE.T010T2.B) will be stored in nonKeyPreds_. |
| // |
| // As a quick fix, we examine each predicate in nonKeyPreds_. If all columns in the |
| // predicate are not the key columns, we will return FALSE. Otherwise, we continue |
| // the verification for next predicate. |
| // |
| |
| ValueIdSet baseColsForKey; |
| const ValueIdList& keyColumns = getKeyColumns(); |
| ValueId vid; |
| for (CollIndex i=0; i<keyColumns.entries(); i++ ) { |
| if ( keyColumns[i].getItemExpr()->getOperatorType() == ITM_INDEXCOLUMN ) |
| vid = ((IndexColumn*)(keyColumns[i].getItemExpr()))->getDefinition(); |
| else |
| vid = keyColumns[i]; |
| |
| baseColsForKey.insert(vid); |
| } |
| |
| ValueIdSet columnsCovered; |
| |
| vid = nonKeyPreds_.init(); |
| for (; nonKeyPreds_.next(vid); nonKeyPreds_.advance(vid)) { |
| |
| columnsCovered.clear(); |
| vid.getItemExpr()->findAll(ITM_BASECOLUMN, columnsCovered, TRUE, TRUE); |
| |
| ValueIdSet overlapSet = baseColsForKey.intersect(columnsCovered); |
| if ( overlapSet.entries() == 0 ) |
| return FALSE; |
| } |
| |
| return TRUE; |
| } |
| */ |
| |
| NABoolean |
| HbaseSearchKey::isEqualTo(const HbaseSearchKey* other) const |
| { |
| if ( isUnique() != other->isUnique() ) |
| return FALSE; |
| |
| if ( getCoveredLeadingKeys() != other->getCoveredLeadingKeys() ) |
| return FALSE; |
| |
| // assume #beginKeyValues == #endKeyValues |
| if ( getBeginKeyValues().entries() != other->getBeginKeyValues().entries() ) |
| return FALSE; |
| |
| if ( !areKeyValuesIdentical(other, getBeginKeyValues().entries()) ) |
| return FALSE; |
| |
| if ( isBeginKeyExclusive() != other->isBeginKeyExclusive() || |
| isEndKeyExclusive() != other->isEndKeyExclusive()) |
| return FALSE; |
| |
| return TRUE; |
| } |