| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| /* -*-C++-*- |
| ****************************************************************************** |
| * |
| * File: RelSample.cpp |
| * Description: All the methods of RelSample() and PhysSample(). |
| * |
| * Created: 9/24/98 |
| * Language: C++ |
| * |
| * |
| ****************************************************************************** |
| */ |
| #include "AllItemExpr.h" |
| #include "ItemSample.h" |
| #include "AllRelExpr.h" |
| #include "RelSample.h" |
| #include "SchemaDB.h" |
| #include "GroupAttr.h" |
| #include "BindWA.h" |
| #include "NormWA.h" |
| #include "Cost.h" |
| #include "CostMethod.h" |
| #include "opt.h" |
| #include "Globals.h" |
| #include "SqlParserAux.h" |
| |
| // ----------------------------------------------------------------------- |
| // This file contains all the methods for the class RelSample. |
| // This is a departure from the way other RelExpr's are organized. |
| // |
| |
| |
| /////////////////////////////////////////////////////////////////////////// |
| // |
| // RelSample Class |
| // |
| // |
| /////////////////////////////////////////////////////////////////////////// |
| |
| RelSample::RelSample(RelExpr *child, |
| SampleTypeEnum sampleType, |
| ItemExpr *balanceExpr, |
| ItemExpr *requiredOrder, |
| CollHeap *oHeap) |
| : RelExpr(REL_SAMPLE, child, NULL, oHeap), |
| balanceExprTree_(balanceExpr), |
| sampleType_(sampleType), |
| sampleScanSucceeded_(FALSE), |
| requiredOrderTree_(requiredOrder) |
| { |
| setNonCacheable(); |
| if (balanceExprTree_ != NULL) |
| { |
| ((ItmBalance *)balanceExprTree_)->propagateSampleType(sampleType); |
| ((ItmBalance *)balanceExprTree_)->rearrangeChildren(); |
| } |
| } |
| |
| RelSample::~RelSample() |
| { |
| } |
| |
| ItemExpr * |
| RelSample::removeRequiredOrderTree() |
| { |
| ItemExpr *requiredOrderTree = requiredOrderTree_; |
| |
| requiredOrderTree_ = NULL; |
| |
| return requiredOrderTree; |
| } |
| |
| |
| Float32 RelSample::getSamplePercent() const |
| { |
| // Call ONLY if: RANDOM or CLUSTER sampling and relative size |
| // and NOT stratified sampling |
| if (sampleType() != RANDOM && |
| sampleType() != CLUSTER) |
| return -1.0; |
| |
| ItmBalance * balExp = NULL; |
| |
| if (balanceExprTree_ != NULL) |
| balExp = (ItmBalance *)balanceExprTree_; |
| else |
| { |
| if (NOT balanceExpr().isEmpty()) |
| { |
| ValueId exprId; |
| balanceExpr().getFirst(exprId); |
| balExp = (ItmBalance *)(exprId.getItemExpr()); |
| } |
| } |
| |
| if (balExp != NULL) |
| { |
| if ((balExp->isAbsolute() == TRUE) OR |
| (balExp->getNextBalance() != NULL)) |
| return -1.0; |
| |
| double size = balExp->getSampleConstValue(); |
| size = size / 100; // Size specified as percent |
| return (float)size; |
| } |
| return -1.0; |
| } |
| |
| Lng32 RelSample::getClusterSize() const |
| { |
| // Call ONLY if: CLUSTER sampling |
| // and NOT stratified sampling |
| if (sampleType() != CLUSTER) |
| return -1; |
| |
| ItmBalance * balExp = NULL; |
| |
| if (balanceExprTree_ != NULL) |
| balExp = (ItmBalance *)balanceExprTree_; |
| else |
| { |
| if (NOT balanceExpr().isEmpty()) |
| { |
| ValueId exprId; |
| balanceExpr().getFirst(exprId); |
| balExp = (ItmBalance *)(exprId.getItemExpr()); |
| } |
| } |
| |
| if (balExp != NULL) |
| { |
| if (balExp->getNextBalance() != NULL) |
| return -1; |
| |
| double size = balExp->getClusterConstValue(); |
| return (Lng32)size; |
| } |
| return -1; |
| } |
| |
| NABoolean RelSample::isSimpleRandomRelative() const |
| { |
| // True only if: RANDOM sampling and relative size |
| // and NOT stratified sampling (except for random |
| // sample of a single stratum). |
| if (getSamplePercent() == -1.0) |
| return FALSE; |
| else |
| return TRUE; |
| } |
| |
| // RelSample::topHash() -------------------------------------------------- |
| // Compute a hash value for a chain of derived RelExpr nodes. |
| // Used by the Cascade engine as a quick way to determine if |
| // two nodes are identical. |
| // Can produce false positives (nodes appear to be identical), |
| // but should not produce false negatives (nodes are definitely different) |
| // |
| // Inputs: none (other than 'this') |
| // |
| // Outputs: A HashValue of this node and all nodes in the |
| // derivation chain below (towards the base class) this node. |
| // |
| HashValue RelSample::topHash() |
| { |
| // Compute a hash value of the derivation chain below this node. |
| // |
| HashValue result = RelExpr::topHash(); |
| |
| result ^= sampleType(); |
| result ^= balanceExpr(); |
| result ^= sampledColumns(); |
| |
| result ^= requiredOrder(); |
| |
| return result; |
| } |
| |
| // RelSample::duplicateMatch() |
| // A more thorough method to compare two RelExpr nodes. |
| // Used by the Cascades engine when the topHash() of two |
| // nodes returns the same hash values. |
| // |
| // Inputs: other - a reference to another node of the same type. |
| // |
| // Outputs: NABoolean - TRUE if this node is 'identical' to the |
| // 'other' node. FALSE otherwise. |
| // |
| // In order to match, this node must match all the way down the |
| // derivation chain to the RelExpr class. |
| // |
| NABoolean |
| RelSample::duplicateMatch(const RelExpr & other) const |
| { |
| // Compare this node with 'other' down the derivation chain. |
| // |
| if (!RelExpr::duplicateMatch(other)) |
| return FALSE; |
| |
| // Cast the RelExpr to a RelSample node. |
| // |
| RelSample &o = (RelSample &) other; |
| |
| // If the sampling type and sample size expressions are same then the |
| // nodes are identical |
| // |
| if (!(sampleType() == o.sampleType())) |
| return FALSE; |
| |
| if(!(balanceExpr() == o.balanceExpr())) |
| return FALSE; |
| |
| // If the required order keys are not the same |
| // then the nodes are not identical |
| // |
| if (!(requiredOrder() == o.requiredOrder())) |
| return FALSE; |
| |
| return TRUE; |
| } |
| |
| // RelSample::copyTopNode ---------------------------------------------- |
| // Copy a chain of derived nodes (Calls RelExpr::copyTopNode). |
| // Needs to copy all relevant fields. |
| // Used by the Cascades engine. |
| // |
| // Inputs: derivedNode - If Non-NULL this should point to a node |
| // which is derived from this node. If NULL, then this |
| // node is the top of the derivation chain and a node must |
| // be constructed. |
| // |
| // Outputs: RelExpr * - A Copy of this node. |
| // |
| // If the 'derivedNode is non-NULL, then this method is being called |
| // from a copyTopNode method on a class derived from this one. If it |
| // is NULL, then this is the top of the derivation chain and an UnPackRows |
| // node must be constructed. |
| // |
| // In either case, the relevant data members must be copied to 'derivedNode' |
| // and 'derivedNode' is passed to the copyTopNode method of the class |
| // below this one in the derivation chain (RelExpr::copyTopNode() in this |
| // case). |
| // |
| RelExpr * |
| RelSample::copyTopNode(RelExpr *derivedNode, CollHeap *outHeap) |
| { |
| RelSample *result; |
| |
| if (derivedNode == NULL) |
| // This is the top of the derivation chain |
| result = new (outHeap) RelSample(child(0), |
| sampleType(), |
| balanceExprTree() |
| ); |
| else |
| // A node has already been constructed as a derived class. |
| result = (RelSample *) derivedNode; |
| |
| // Copy the relavant fields. |
| |
| result->sampleType_ = sampleType(); |
| |
| result->sampleScanSucceeded_ = sampleScanSucceeded(); |
| |
| if (balanceExprTree() != NULL) |
| result->balanceExprTree() = balanceExprTree()->copyTree(outHeap)->castToItemExpr(); |
| |
| result->balanceExpr() = balanceExpr(); |
| |
| result->sampledColumns() = sampledColumns(); |
| |
| result->requiredOrder() = requiredOrder(); |
| |
| // Copy any data members from the classes lower in the derivation chain. |
| // |
| return RelExpr::copyTopNode(result, outHeap); |
| } |
| |
| |
| // RelSample::addLocalExpr() ----------------------------------------------- |
| // Insert into a list of expressions all the expressions of this node and |
| // all nodes below this node in the derivation chain. Insert into a list of |
| // names, all the names of the expressions of this node and all nodes below |
| // this node in the derivation chain. This method is used by the GUI tool |
| // and by the Explain Function to have a common method to get all the |
| // expressions associated with a node. |
| // |
| // Inputs/Outputs: xlist - a list of expressions. |
| // llist - a list of names of expressions. |
| // |
| // The xlist contains a list of all the expressions associated with this |
| // node. The llist contains the names of these expressions. (This lists |
| // must be kept in the same order). |
| // RelSample::addLocalExpr potentially adds the balance expression |
| // ("balance_expression"). |
| // |
| // It then calls RelExpr::addLocalExpr() which will add any RelExpr |
| // expressions to the list. |
| // |
| void RelSample::addLocalExpr(LIST(ExprNode *) &xlist, |
| LIST(NAString) &llist) const |
| { |
| if (sampledColumns().entries() > 0) |
| { |
| xlist.insert(sampledColumns().rebuildExprTree(ITM_ITEM_LIST)); |
| llist.insert("sampled_columns"); |
| } |
| |
| if (balanceExprTree() || balanceExpr().entries() > 0) |
| { |
| if (balanceExprTree_) |
| xlist.insert(balanceExprTree_); |
| else |
| xlist.insert(balanceExpr().rebuildExprTree(ITM_ITEM_LIST)); |
| llist.insert("balance_expression"); |
| } |
| |
| if (requiredOrderTree_) { |
| xlist.insert(requiredOrderTree_); |
| llist.insert("required_order"); |
| } else if (requiredOrder().entries() > 0) { |
| xlist.insert(requiredOrder().rebuildExprTree(ITM_ITEM_LIST)); |
| llist.insert("required_order"); |
| } |
| |
| RelExpr::addLocalExpr(xlist,llist); |
| } |
| |
| // RelSample::getPotentialOutputValues() --------------------------------- |
| // Construct a Set of the potential outputs of this node. |
| // |
| // Inputs: none (other than 'this') |
| // |
| // Outputs: outputValues - a ValueIdSet representing the potential outputs |
| // of this node. |
| // |
| // The potential outputs for the RelSample node are the new columns |
| // generated by the RelSample node. The new columns generated by RelSample |
| // node are "Sampled" versions of all the potential outputs of its child. |
| // |
| void |
| RelSample::getPotentialOutputValues(ValueIdSet & outputValues) const |
| { |
| outputValues += sampledColumns(); |
| } |
| |
| // RelSample::pushdownCoveredExpr() ------------------------------------ |
| // |
| // In order to compute the Group Attributes for a relational operator |
| // an analysis of all the scalar expressions associated with it is |
| // performed. The purpose of this analysis is to identify the sources |
| // of the values that each expression requires. As a result of this |
| // analysis values are categorized as external dataflow inputs or |
| // those that can be produced completely by a certain child of the |
| // relational operator. |
| // |
| // This method is invoked on each relational operator. It causes |
| // a) the pushdown of predicates and |
| // b) the recomputation of the Group Attributes of each child. |
| // The recomputation is required either because the child is |
| // assigned new predicates or is expected to compute some of the |
| // expressions that are required by its parent. |
| // |
| // For the sample operator, only the balance expression contains references |
| // to any outputs produced by the child. These expressions refer to the unsampled |
| // columns and therefore can be pushed down. However, the predicatesOnParent if |
| // they exist refer to the sampled columns and therefore they will not be pushed |
| // down. |
| // --------------------------------------------------------------------- |
| |
| void |
| RelSample::pushdownCoveredExpr(const ValueIdSet &outputExpr, |
| const ValueIdSet &newExternalInputs, |
| ValueIdSet &predicatesOnParent, |
| const ValueIdSet *setOfValuesReqdByParent, |
| Lng32 childIndex |
| ) |
| { |
| ValueIdSet exprOnParent; |
| if (setOfValuesReqdByParent) |
| exprOnParent = *setOfValuesReqdByParent; |
| exprOnParent += outputExpr; |
| ValueId refVal; |
| ValueIdSet outputSet; |
| |
| // Prune from the sampledColumns() ValueIdSet, those expressions |
| // that are not needed above (in setOfValuesReqdByParent) or by |
| // the selectionPred. |
| // |
| for(ValueId sampleCol = sampledColumns().init(); sampledColumns().next(sampleCol); |
| sampledColumns().advance(sampleCol)) { |
| if(!exprOnParent.referencesTheGivenValue(sampleCol, refVal) && |
| !selectionPred().referencesTheGivenValue(sampleCol, refVal)) { |
| sampledColumns() -= sampleCol; |
| } |
| } |
| |
| // Remove all expressions from exprOnParent. They |
| // can't be pushed down! |
| // |
| exprOnParent.clear(); |
| |
| // Add all the values required for the Sample expressions |
| // to the values required by the parent. These expression |
| // can't be pushed down either, but attempting to push them |
| // down causes the child node to provide the values needed. |
| // |
| outputSet += sampledColumns(); |
| |
| exprOnParent += balanceExpr(); |
| |
| exprOnParent.insertList(requiredOrder()); |
| |
| RelExpr::pushdownCoveredExpr(outputSet, |
| newExternalInputs, |
| predicatesOnParent, |
| &exprOnParent, |
| childIndex); |
| |
| } // RelSample::pushdownCoveredExpr |
| |
| Context* RelSample::createContextForAChild(Context* myContext, |
| PlanWorkSpace* pws, |
| Lng32& childIndex) |
| { |
| // --------------------------------------------------------------------- |
| // If one Context has been generated for each child, return NULL |
| // to signal completion. |
| // --------------------------------------------------------------------- |
| if (pws->getCountOfChildContexts() == getArity()) |
| return NULL; |
| |
| childIndex = 0; |
| |
| Lng32 planNumber = 0; |
| const ReqdPhysicalProperty* rppForMe = myContext->getReqdPhysicalProperty(); |
| PartitioningRequirement* partReqForMe = |
| rppForMe->getPartitioningRequirement(); |
| |
| // If a partitioning requirement exists and it requires broadcast |
| // replication, then return NULL now. Only an exchange operator |
| // can satisfy a broadcast replication partitioning requirement. |
| if ((partReqForMe != NULL) AND |
| partReqForMe->isRequirementReplicateViaBroadcast()) |
| return NULL; |
| |
| RequirementGenerator rg(child(0),rppForMe); |
| |
| // --------------------------------------------------------------------- |
| // Add the order requirements needed for this RelSample node |
| // --------------------------------------------------------------------- |
| |
| // Remove any sort order requirement from parent. |
| // |
| rg.removeSortKey(); |
| rg.removeArrangement(); |
| rg.removeSortOrderTypeReq(); |
| |
| // Shouldn't/Can't add a sort order type requirement |
| // if we are in DP2 |
| if (rppForMe->executeInDP2()) |
| rg.addSortKey(requiredOrder(),NO_SOT); |
| else |
| rg.addSortKey(requiredOrder(),ESP_SOT); |
| |
| // Can not execute absolute sampling in parallel |
| // |
| if(!isSimpleRandomRelative()) |
| rg.addNumOfPartitions(1); |
| |
| // --------------------------------------------------------------------- |
| // Done adding all the requirements together, now see whether it worked |
| // and give up if it is not possible to satisfy them |
| // --------------------------------------------------------------------- |
| if (NOT rg.checkFeasibility()) |
| return NULL; |
| |
| // --------------------------------------------------------------------- |
| // Compute the cost limit to be applied to the child. |
| // --------------------------------------------------------------------- |
| CostLimit* costLimit = computeCostLimit(myContext, pws); |
| |
| // --------------------------------------------------------------------- |
| // Get a Context for optimizing the child. |
| // Search for an existing Context in the CascadesGroup to which the |
| // child belongs that requires the same properties as those in |
| // rppForChild. Reuse it, if found. Otherwise, create a new Context |
| // that contains rppForChild as the required physical properties.. |
| // --------------------------------------------------------------------- |
| Context* result = shareContext(childIndex, |
| rg.produceRequirement(), |
| myContext->getInputPhysicalProperty(), |
| costLimit, |
| myContext, |
| myContext->getInputLogProp()); |
| |
| // --------------------------------------------------------------------- |
| // Store the Context for the child in the PlanWorkSpace. |
| // --------------------------------------------------------------------- |
| pws->storeChildContext(childIndex, planNumber, result); |
| |
| return result; |
| |
| } // RelSample::createContextForAChild() |
| |
| // RelSample::removeBalanceExprTree() ------------------------------------- |
| // Return the sizeExprTree_ ItemExpr tree and set to NULL, |
| // |
| // Inputs: none (Other than 'this') |
| // |
| // Outputs: ItemExpr * - the value of sizeExprTree_ |
| // |
| // Side Effects: Sets the value of sizeExprTree_ to NULL. |
| // |
| // Called by RelSample::bindNode(). The value of sizeExprTree_ is not |
| // needed after the binder. |
| // |
| ItemExpr * |
| RelSample::removeBalanceExprTree() |
| { |
| ItemExpr *result = balanceExprTree(); |
| balanceExprTree_ = (ItemExpr *)NULL; |
| return result; |
| } |
| |
| // RelSample::transformNode() ------------------------------------------- |
| // Unconditional query transformations such as the transformation of |
| // a subquery to a semijoin are implemented by the virtual function |
| // transformNode(). The aim of such transformations is to bring the |
| // query tree to a canonical form. transformNode() also ensures |
| // that the "required" (or characteristic) input values are "minimal" |
| // and the "required" (or characteristic) outputs values are |
| // "maximal" for each operator. |
| // |
| // transformNode() is an overloaded name, which is used for a set |
| // of methods that implement the transformation phase of query |
| // normalization. |
| // |
| // We use the term query tree for a tree of relational operators, |
| // each of which can contain none or more scalar expression trees. |
| // The transformations performed by transformNode() brings scalar |
| // expressions into a canonical form. The effect of most such |
| // transformations is local to the scalar expression tree. |
| // However, the transformation of a subquery requires a semijoin |
| // to be performed between the relational operator that contains |
| // the subquery and the query tree for the subquery. The effect |
| // of such a subquery transformation is therefore visible not |
| // only in the scalar expression tree but also in the relational |
| // expression tree. |
| // |
| // Parameters: |
| // |
| // NormWA & normWARef |
| // IN : a pointer to the normalizer work area |
| // |
| // ExprGroupId & locationOfPointerToMe |
| // IN : a reference to the location that contains a pointer to |
| // the RelExpr that is currently being processed. |
| // |
| void RelSample::transformNode(NormWA &normWARef, |
| ExprGroupId &locationOfPointerToMe) |
| { |
| CMPASSERT( this == locationOfPointerToMe ); |
| |
| // If this node has already been transformed, we are done. |
| // |
| if (nodeIsTransformed()) |
| return; |
| |
| // Make sure that it is only transformed once. |
| // |
| markAsTransformed(); |
| |
| //Sample node does not pull up the predicates and so the equality |
| //predicates on below this node are not true above this node, |
| //so create a new VEGRegion when transfroming the child |
| normWARef.allocateAndSetVEGRegion(IMPORT_AND_EXPORT,this); |
| |
| // transformNode takes up a bound tree and turns into a transformed |
| // tree. For a RelExpr that means the following. |
| // + expressions are transformed. If the expressions contain |
| // subqueries then new RelExpr are created for them and |
| // they are usually added above (as an ancestor) of the node |
| // that contained them. |
| // + predicates are pulled up from the children and their |
| // required inputs are modified |
| // + the required inputs of the node itself are changed from |
| // being a sufficient set to being a sufficient minimal set. |
| // |
| // Transform the child. |
| // Pull up their transformed predicates |
| // recompute their required inputs. |
| // |
| child(0)->transformNode(normWARef, child(0)); |
| |
| if(balanceExpr().transformNode(normWARef, |
| child(0), |
| getGroupAttr()->getCharacteristicInputs())) |
| { |
| // ----------------------------------------------------------------- |
| // Transform my new child |
| // ----------------------------------------------------------------- |
| child(0)->transformNode(normWARef, child(0)); |
| } |
| |
| if(requiredOrder(). |
| transformNode(normWARef, |
| child(0), |
| getGroupAttr()->getCharacteristicInputs())) { |
| |
| // The requiredOrder list apparently had some subqueries that had |
| // not been processed before (is this possible?). Normalize the |
| // new tree that has become our child. |
| // |
| child(0)->transformNode(normWARef, child(0)); |
| } |
| |
| normWARef.restoreOriginalVEGRegion(); |
| |
| // Pull up the predicates and recompute the required inputs |
| // of whoever my children are now. |
| // |
| pullUpPreds(); |
| |
| |
| // transform the selection predicates |
| // |
| transformSelectPred(normWARef, locationOfPointerToMe); |
| |
| } // RelSample::transformNode() |
| |
| // RelSample::rewriteNode() --------------------------------------------- |
| // rewriteNode() is the virtual function that computes |
| // the transitive closure for "=" predicates and rewrites value |
| // expressions. |
| // |
| // Parameters: |
| // |
| // NormWA & normWARef |
| // IN : a pointer to the normalizer work area |
| // |
| void RelSample::rewriteNode(NormWA & normWARef) |
| { |
| |
| // locate the VEGRegion that was created for this node during transformation |
| normWARef.locateAndSetVEGRegion(this); |
| |
| child(0)->rewriteNode(normWARef); |
| |
| balanceExpr().normalizeNode(normWARef); |
| |
| requiredOrder().normalizeNode(normWARef); |
| |
| normWARef.restoreOriginalVEGRegion(); |
| |
| selectionPred().normalizeNode(normWARef); |
| |
| // rewrite expression in the group attributes |
| getGroupAttr()->normalizeInputsAndOutputs(normWARef); |
| |
| } // RelSample::rewriteNode() |
| |
| |
| RelExpr * RelSample::normalizeNode(NormWA & normWARef) |
| { |
| if (nodeIsNormalized()) |
| return this; |
| markAsNormalized(); |
| |
| pushdownCoveredExpr(getGroupAttr()->getCharacteristicOutputs(), |
| getGroupAttr()->getCharacteristicInputs(), |
| selectionPred()); |
| |
| // locate the VEGRegion that was created for this node during transformation |
| normWARef.locateAndSetVEGRegion(this); |
| |
| child(0) = child(0)->normalizeNode(normWARef); |
| |
| normWARef.restoreOriginalVEGRegion(); |
| |
| fixEssentialCharacteristicOutputs(); |
| |
| return this; |
| } |
| |
| |
| // RelSample::pullUpPreds() -------------------------------------------- |
| // is redefined to disallow the pullup of predicates |
| // from the operator's child. The outputs of the sample operator |
| // are "sampled" versions of the outputs of its child. |
| // |
| void RelSample::pullUpPreds() |
| { |
| // --------------------------------------------------------------------- |
| // Simply don't pull up child's selection predicates. Still need to tell |
| // child to recompute its outer references due to the warning below. |
| // --------------------------------------------------------------------- |
| child(0)->recomputeOuterReferences(); |
| |
| // --------------------------------------------------------------------- |
| // WARNING: One rule that this procedure must follow is |
| // that recomputeOuterReferences() must be called on the children even |
| // if no predicates are pulled up from them. This is to correct |
| // the outer references that are added to a right child of a |
| // semi or outer join when processing subqueries in the ON clause. |
| // --------------------------------------------------------------------- |
| } |
| |
| // RelSample::recomputeOuterReferences() -------------------------------- |
| // This method is used by the normalizer for recomputing the |
| // outer references (external dataflow input values) that are |
| // still referenced by each operator in the subquery tree |
| // after the predicate pull up is complete. |
| // |
| // Side Effects: sets the characteristicInputs of the groupAttr. |
| // |
| void RelSample::recomputeOuterReferences() |
| { |
| // This is virtual method on RelExpr. |
| // When this is called it is assumed that the children have already |
| // been transformed. |
| // The required inputs of the child are therefore already minimal |
| // and sufficient. |
| // It is also assumed that the RelExpr itself has been bound. |
| // That implies that the group attributes have already been allocated |
| // and the required inputs is a sufficient (but not necessarilly minimum) |
| // set of external values needed to evaluate all expressions in this subtree. |
| // |
| // Delete all those input values that are no longer referenced on |
| // this operator because the predicates that reference them have |
| // been pulled up. |
| // |
| ValueIdSet outerRefs = getGroupAttr()->getCharacteristicInputs(); |
| |
| // The set of valueIds need by this node. |
| // |
| ValueIdSet allMyExpr(getSelectionPred()); |
| |
| allMyExpr += balanceExpr(); |
| |
| allMyExpr.insertList(requiredOrder()); |
| |
| |
| // Remove from outerRefs those valueIds that are not needed |
| // by all my expressions |
| // |
| allMyExpr.weedOutUnreferenced(outerRefs); |
| |
| // Add to outerRefs those that my children need. |
| // |
| outerRefs += child(0).getPtr()->getGroupAttr()->getCharacteristicInputs(); |
| |
| // set my Character Inputs to this new minimal set. |
| // |
| getGroupAttr()->setCharacteristicInputs(outerRefs); |
| } // RelSample::recomputeOuterReferences() |
| |
| |
| CostScalar RelSample::computeResultSize(const CostScalar &childCardinality) |
| { |
| |
| // CostScalar resultSize; |
| |
| // Compute the result size based on the size expression. |
| // For now, set it to the child cardinality |
| |
| CMPASSERT(balanceExpr().entries() == 1); |
| |
| ValueId balanceRoot; |
| |
| balanceExpr().getFirst(balanceRoot); |
| |
| ItmBalance *balanceExpr = (ItmBalance *)balanceRoot.getItemExpr(); |
| |
| CostScalar resultSize = balanceExpr->computeResultSize(childCardinality); |
| |
| return resultSize; |
| } |
| |
| |
| // RelSample::synthEstLogProp() ------------------------------------------ |
| // synthesize estimated logical properties given a specific set of |
| // input log. properties. |
| // |
| // Parameters: |
| // |
| // EstLogPropSharedPtr inputEstLogProp |
| // IN : A set of input logical properties used to estimate the logical |
| // properities of this node. |
| // |
| void RelSample::synthEstLogProp(const EstLogPropSharedPtr& inputEstLogProp) |
| { |
| if (getGroupAttr()->isPropSynthesized(inputEstLogProp) == TRUE) |
| return; |
| |
| // Get the estimated logical properties of the child. To be used |
| // to estimate the logical properties of this node. |
| // |
| EstLogPropSharedPtr childEstProp = child(0).outputLogProp(inputEstLogProp); |
| const ColStatDescList &childColStats = childEstProp->getColStats(); |
| |
| CostScalar rowCount = |
| computeResultSize(childEstProp->getResultCardinality()); |
| |
| for(CollIndex i = 0; i < childColStats.entries(); i++) { |
| ColStatDescSharedPtr columnStatDesc = childColStats[i]; |
| CostScalar oldCount = columnStatDesc->getColStats()->getRowcount(); |
| |
| if (oldCount != rowCount) |
| columnStatDesc->synchronizeStats(oldCount, rowCount); |
| } |
| |
| EstLogPropSharedPtr myEstProps = |
| synthEstLogPropForUnaryLeafOp(inputEstLogProp, |
| childColStats, |
| rowCount); |
| |
| // Set the logical properties of this node. |
| // |
| getGroupAttr()->addInputOutputLogProp(inputEstLogProp, myEstProps); |
| |
| } // RelSample::synthEstLogProp |
| |
| |
| // RelSample::synthLogProp ---------------------------------------------- |
| // synthesize logical properties |
| // |
| void |
| RelSample::synthLogProp(NormWA * normWAPtr) |
| { |
| // check to see whether properties are already synthesized. |
| if (getGroupAttr()->existsLogExprForSynthesis()) |
| return; |
| |
| RelExpr::synthLogProp(normWAPtr); |
| |
| ValueIdSet nonRIConstraints; |
| for (ValueId x= child(0).getGroupAttr()->getConstraints().init(); |
| child(0).getGroupAttr()->getConstraints().next(x); |
| child(0).getGroupAttr()->getConstraints().advance(x) ) |
| { |
| if ((x.getItemExpr()->getOperatorType() != ITM_COMP_REF_OPT_CONSTRAINT) && |
| (x.getItemExpr()->getOperatorType() != ITM_REF_OPT_CONSTRAINT)) |
| nonRIConstraints += x; |
| } |
| getGroupAttr()->addConstraints(nonRIConstraints); |
| getGroupAttr()->addSuitableRefOptConstraints |
| (child(0).getGroupAttr()->getConstraints()); |
| |
| } // RelSample::synthLogProp() |
| |
| |
| |
| // RelSample::bindNode - Bind the RelSample node. |
| // This node is generated by the parser when it encounters a SAMPLE |
| // clause. This node has two item expressions: |
| // |
| // sizeExprTree(): This expression contains either a simple size |
| // expression containing a size, type and normal/oversampling field |
| // or a tree of IfThenElse nodes each with a predicate on a column |
| // and a simple size expression followed by an optional else part. |
| // |
| // skipPeriodTree(): This is a simple size expression with no reference |
| // to any inputs or outputs of this node or its child. The reason to make |
| // this into an expression is only to reuse the SampleSize ItemExpr. |
| // However, this "expression" is really not an expression and hence need not |
| // treated as one (e.g., there is no need to bind, normalize, etc. Similarly |
| // no need to check for outer references, etc). |
| // |
| RelExpr *RelSample::bindNode(BindWA *bindWA) |
| { |
| |
| // If this node has already been bound, we are done. |
| // |
| if (nodeIsBound()) |
| return this; |
| |
| // Bind the child nodes. |
| // |
| bindChildren(bindWA); |
| if (bindWA->errStatus()) |
| return this; |
| |
| |
| // If this is a random sample on an HBase table, push the sampling down into |
| // the Scan node and remove the Sample node from the tree. For HBase, we |
| // perform sampling via a row filter on the HBase side. |
| // |
| // Avoid pushdown for oversampling (sampling rate > 100%); the HBase filter |
| // we use can not return >1 copy of a row. |
| // |
| // For very low sampling rates, a significant amount of time could be spent in |
| // HBase before returning anything to Trafodion, with the risk of getting a |
| // scanner timeout exception. This is addressed on the HBase side by reducing |
| // the scan's cache size (the term they use for a set of rows combined into a |
| // single return). In extreme cases, the expected interval between returns may |
| // still be too great even when the cache size is set to the minimum prescribed |
| // by the HBASE_NUM_CACHE_ROWS_MIN cqd. For these cases, we divide the sampling |
| // between HBase and Trafodion, doing as much as possible in HBase without risking |
| // timeout. |
| // |
| Float32 trafSampleRate = getSamplePercent(); |
| RelExpr* myChild = child(0); |
| if (myChild->getOperatorType() == REL_SCAN && |
| (static_cast<Scan*>(myChild))->isHbaseTable() && |
| isSimpleRandomRelative() && |
| trafSampleRate <= 1.0f) |
| { |
| ULng32 returnInterval = |
| ActiveSchemaDB()->getDefaults().getAsULong(USTAT_HBASE_SAMPLE_RETURN_INTERVAL); |
| Lng32 cacheMin = CmpCommon::getDefaultNumeric(HBASE_NUM_CACHE_ROWS_MIN); |
| if (trafSampleRate < cacheMin / (Float32)returnInterval) |
| { |
| Float32 hbaseSampleRate = cacheMin / (Float32)returnInterval; |
| trafSampleRate /= hbaseSampleRate; |
| |
| // The parser function literalOfNumericWithScale() is used to get the |
| // correct form of the ConstValue (a fixed numeric) required for the |
| // ITM_BALANCE sample percentage operand. That function expects a heap- |
| // allocated NAString (which it deletes) containing the percentage in |
| // text form. |
| static const int BUF_SIZE = 20; |
| char buf[BUF_SIZE]; |
| snprintf(buf, BUF_SIZE, "%f", trafSampleRate * 100); // Express as a percentage |
| NAString* percentStrPtr = new(STMTHEAP) NAString(buf, STMTHEAP); |
| ExprNode* oldConst = balanceExprTree_->getChild(1); |
| balanceExprTree_->setChild(1, literalOfNumericWithScale(percentStrPtr, '+')); |
| delete oldConst; |
| (static_cast<Scan*>(myChild))->samplePercent(hbaseSampleRate); |
| } |
| else |
| { |
| (static_cast<Scan*>(myChild))->samplePercent(trafSampleRate); |
| return myChild; |
| } |
| } |
| |
| ItemExpr *requiredOrderTree = removeRequiredOrderTree(); |
| |
| if(requiredOrderTree) { |
| bindWA->getCurrentScope()->context()->inOrderBy() = TRUE; |
| requiredOrderTree->convertToValueIdList(requiredOrder(), |
| bindWA, |
| ITM_ITEM_LIST); |
| bindWA->getCurrentScope()->context()->inOrderBy() = FALSE; |
| if(bindWA->errStatus()) |
| return this; |
| } |
| |
| // Bind the balanceExprTree. This expression may contain a tree |
| // of Balance expressions. |
| // |
| ItemExpr *boundBalanceExpr = removeBalanceExprTree()->bindNode(bindWA); |
| |
| if (bindWA->errStatus()) |
| return this; |
| |
| if (boundBalanceExpr != NULL) { |
| balanceExpr().insert(boundBalanceExpr->getValueId()); |
| |
| if(((ItmBalance *)boundBalanceExpr)->checkErrors()) { |
| bindWA->setErrStatus(); |
| return this; |
| } |
| } |
| |
| // Generate the selection predicate from the balance expression only |
| // if there is a balance expression tree and there is no else clause. |
| // |
| NABoolean hasReturnTrue = FALSE; |
| ItmBalance * nextBalanceNode = (ItmBalance *)boundBalanceExpr; |
| |
| while ((nextBalanceNode != NULL) AND (hasReturnTrue == FALSE)) |
| { |
| if (nextBalanceNode->getPredicate()->getOperatorType() == ITM_RETURN_TRUE) |
| hasReturnTrue = TRUE; |
| nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance(); |
| } |
| |
| if (hasReturnTrue == FALSE) |
| { |
| nextBalanceNode = (ItmBalance *)boundBalanceExpr; |
| ItemExpr *pred; |
| ItemExpr *orexpr = NULL; |
| |
| while (nextBalanceNode != NULL) |
| { |
| pred = nextBalanceNode->getPredicate(); |
| |
| if (orexpr != NULL) |
| orexpr = new (CmpCommon::statementHeap()) BiLogic(ITM_OR, orexpr, pred); |
| else |
| orexpr = pred; |
| |
| nextBalanceNode = (ItmBalance *)nextBalanceNode->getNextBalance(); |
| } |
| // Now synthesize type |
| if (orexpr) |
| orexpr->synthTypeAndValueId(TRUE); // redrive Type Synthesis |
| |
| addSelPredTree(orexpr); |
| } |
| |
| // Construct the RETDesc for this node. |
| // |
| RETDesc *resultTable = new(bindWA->wHeap()) RETDesc(bindWA); |
| |
| // Add the columns from the child to the RETDesc. |
| // |
| const RETDesc &childTable = *child(0)->getRETDesc(); |
| |
| const ColumnDescList *sysColList = childTable.getSystemColumnList(); |
| |
| CollIndex i = 0; |
| for(i = 0; i < sysColList->entries(); i++) |
| { |
| ValueId columnValueId = sysColList->at(i)->getValueId(); |
| ItemExpr *newColumn = new (bindWA->wHeap()) |
| NotCovered (columnValueId.getItemExpr()); |
| newColumn->synthTypeAndValueId(); |
| |
| resultTable->addColumn(bindWA, |
| sysColList->at(i)->getColRefNameObj(), |
| newColumn->getValueId(), |
| SYSTEM_COLUMN, |
| sysColList->at(i)->getHeading()); |
| sampledColumns() += newColumn->getValueId(); |
| } |
| |
| for(i = 0; i < childTable.getDegree(); i++) |
| { |
| ValueId columnValueId = childTable.getValueId(i); |
| ItemExpr *newColumn = new (bindWA->wHeap()) |
| NotCovered (columnValueId.getItemExpr()); |
| newColumn->synthTypeAndValueId(); |
| |
| resultTable->addColumn(bindWA, |
| childTable.getColRefNameObj(i), |
| newColumn->getValueId(), |
| USER_COLUMN, |
| childTable.getHeading(i)); |
| sampledColumns() += newColumn->getValueId(); |
| } |
| |
| // Set the return descriptor |
| // |
| setRETDesc(resultTable); |
| bindWA->getCurrentScope()->setRETDesc(resultTable); |
| |
| // |
| // Bind the base class. |
| // |
| return bindSelf(bindWA); |
| } // RelSample::bindNode() |
| |
| // ----------------------------------------------------------------------- |
| // RelSample::semanticQueryOptimizeNode() |
| // This instance of the SQO virtual method is the same as the base class |
| // implementation except that it also keeps track of which |
| // VEGRegion we are currently in. |
| // ----------------------------------------------------------------------- |
| RelExpr * RelSample::semanticQueryOptimizeNode(NormWA & normWARef) |
| { |
| if (nodeIsSemanticQueryOptimized()) |
| return this; |
| markAsSemanticQueryOptimized() ; |
| |
| normWARef.locateAndSetVEGRegion(this); |
| // --------------------------------------------------------------------- |
| // UnNest the child. |
| // --------------------------------------------------------------------- |
| child(0) = child(0)->semanticQueryOptimizeNode(normWARef); |
| |
| normWARef.restoreOriginalVEGRegion(); |
| |
| return this; |
| |
| } // RelSample::semanticQueryOptimizeNode() |
| |
| |
| ///////////////////////////////////////////////////////////////////////////////////// |
| // |
| // Methods of the PhysSample |
| // |
| ///////////////////////////////////////////////////////////////////////////////////// |
| |
| PhysSample::~PhysSample() |
| { |
| }; |
| |
| |
| RelExpr * |
| PhysSample::copyTopNode(RelExpr *derivedNode, CollHeap *oHeap) |
| { |
| PhysSample *result; |
| |
| if (derivedNode == NULL) |
| result = new (oHeap) PhysSample(); |
| else |
| result = (PhysSample *)derivedNode; |
| |
| return RelSample::copyTopNode(result, oHeap); |
| |
| } |
| |
| |
| // PhysSample::costMethod() |
| // Obtain a pointer to a CostMethod object providing access |
| // to the cost estimation functions for nodes of this type. |
| CostMethod* |
| PhysSample::costMethod() const |
| { |
| static THREAD_P CostMethodSample *m = NULL; |
| if (m == NULL) |
| m = new (GetCliGlobals()->exCollHeap()) CostMethodSample(); |
| return m; |
| } // PhysSample::costMethod() |
| |
| |
| ValueIdList |
| RelSample::mapSortKey(const ValueIdList &sortKey) const |
| { |
| |
| ValueIdMap sampColsMap; |
| |
| for(ValueId sampleCol = sampledColumns().init(); |
| sampledColumns().next(sampleCol); |
| sampledColumns().advance(sampleCol)) { |
| |
| CMPASSERT(sampleCol.getItemExpr()->getOperatorType() == ITM_NOTCOVERED); |
| |
| sampColsMap.addMapEntry(sampleCol, |
| sampleCol.getItemExpr()->child(0)->getValueId()); |
| } |
| |
| ValueIdList newSortKey; |
| |
| sampColsMap.mapValueIdListUp(newSortKey, sortKey); |
| |
| return newSortKey; |
| } |
| |
| PhysicalProperty * |
| PhysSample::synthPhysicalProperty(const Context *context, |
| const Lng32 pn, |
| PlanWorkSpace *pws) |
| { |
| |
| const PhysicalProperty * const sppOfChild = |
| context->getPhysicalPropertyOfSolutionForChild(0); |
| |
| // for now, simply propagate the physical property |
| PhysicalProperty *samplePP = new(CmpCommon::statementHeap()) |
| PhysicalProperty(*sppOfChild, |
| mapSortKey(sppOfChild->getSortKey()), |
| sppOfChild->getSortOrderType(), |
| sppOfChild->getDp2SortOrderPartFunc()); |
| |
| return samplePP; |
| } // PhysSample::synthPhysicalProperty() |
| |