| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| |
| #include "ExpPCodeOptimizations.h" |
| |
| // |
| // Calculate the selectivity of this predicate group by taking both hit-ratio |
| // and cost into account. The higher the hit-ratio (and lower the cost) results |
| // in a higher selectivity (i.e. a better chance for this predicate group to |
| // be scheduled earlier |
| // |
| float PCodePredicateGroup::getSelectivity(NABoolean isStatic) |
| { |
| // If static, taken is 1 so that only cost is recognized. |
| float taken = (isStatic) ? ((float)1) : ((float)takenCount_); |
| float hitRatio = (((float)taken) / ((float)seenCount_)); |
| float efficiency = hitRatio / ((float)cost_); |
| |
| return efficiency; |
| } |
| |
| // |
| // The major selectivity function is based on hit-ratio. A predicate group |
| // that is seen just once, and taken, would result in a 100% hit-ratio, which |
| // would make it a better candidate than say a predicate group that is taken |
| // 99 times out of the 100 which it is seen. With cost being equal, it seems |
| // unfair to penalize a predicate group just because it's hit-ratio isn't the |
| // best. |
| // |
| // This minor selectivity function is used to give credit to those predicate |
| // groups with high taken counts - with all else being equal, a predicate |
| // group with a higher taken count can be considered a "reliable" or |
| // "guaranteed" choice. |
| // |
| float PCodePredicateGroup::getMinorSelectivity(NABoolean isStatic) |
| { |
| // If static, taken is 1 so that only cost is recognized. |
| float taken = (isStatic) ? ((float)1) : ((float)takenCount_); |
| float efficiency = taken / ((float)cost_); |
| |
| return efficiency; |
| } |
| |
| void PCodePredicateGroup::destroy(GROUPLIST& allGroups) |
| { |
| #if 0 |
| for (CollIndex i=0; i < allGroups.entries(); i++) |
| NADELETE(allGroups[i], PCodePredicateGroup, heap_); |
| #endif |
| } |
| |
| // |
| // Constructor for PCodePredicateGroup. The "isStatic" parameter is passed in |
| // as TRUE when static reordering is performed, and FALSE when dynamic |
| // reordering is done during runtime optimizations. |
| // |
| PCodePredicateGroup::PCodePredicateGroup(CollHeap* heap, |
| PCodeBlock* member, |
| NABoolean isStatic, |
| PCodeCfg* cfg) |
| : heap_(heap), predicates_(heap), reads_(heap), writes_(heap) |
| { |
| PCodeInst* lastInst = member->getLastInst(); |
| |
| // Add member to predicate group list |
| predicates_.insert(member); |
| |
| // Convert logical branches to logical counted branches |
| if (lastInst->isLogicalBranch()) { |
| // A predicate group that is a logical branch must be compile-time only |
| assert(isStatic); |
| |
| // Get appropriate opcode for logical counted branch |
| PCIT::Instruction opc = ((lastInst->getOpcode() == PCIT::BRANCH_AND) |
| ? PCIT::BRANCH_AND_CNT : PCIT::BRANCH_OR_CNT); |
| |
| // Add logical counted branch at end and delete old logical branch |
| PCodeInst* branch = member->insertNewInstAfter(lastInst, opc); |
| |
| // Rewrite pcode operands for this inst |
| branch->code[2] = 0; // clear trigger count |
| branch->code[3] = lastInst->code[3]; |
| branch->code[4] = lastInst->code[4]; |
| branch->code[5] = lastInst->code[5]; |
| branch->code[6] = lastInst->code[6]; |
| branch->code[7] = 0; // clear seen count |
| branch->code[8] = 0; // clear taken count |
| |
| // Reload operands |
| branch->reloadOperands(cfg); |
| |
| // Delete old logical branch |
| member->deleteInst(lastInst); |
| } |
| |
| // Set the counts for this basic predicate group |
| seenCount_ = (isStatic) ? 1 : member->getLastInst()->code[7]; |
| takenCount_ = (isStatic) ? 0 : member->getLastInst()->code[8]; |
| |
| // Set the cost_ for this basic predicate group. |
| cost_ = 0; |
| FOREACH_INST_IN_BLOCK(member, inst) { |
| cost_ += inst->getCost(); |
| } ENDFE_INST_IN_BLOCK |
| |
| // Always make cost_ a multiple of the second costliest PCODE instruction. |
| // At minimum its cost should be 1. |
| //cost_ = (cost_ < 100) ? 1 : (cost_/100 * 100); |
| } |
| |
| #if defined(_DEBUG) |
| |
| void PCodePredicateGroup::print() |
| { |
| CollIndex i; |
| |
| // Print all predicate blocks in this group |
| printf("("); |
| for (i=0; i < predicates_.entries(); i++) |
| printf(" %d ", predicates_[i]->getBlockNum()); |
| |
| // Print counts and costs for this predicate group |
| printf(") Cost=%d, Seen=%d, Taken=%d ", |
| (Int32)cost_, (Int32)seenCount_, (Int32)takenCount_); |
| |
| // Print reads |
| printf(", Reads="); |
| for (i=0; i < reads_.entries(); i++) |
| printf("%d ", reads_[i]->getBvIndex()); |
| |
| // Print writes |
| printf(", Writes="); |
| for (i=0; i < writes_.entries(); i++) |
| printf("%d ", writes_[i]->getBvIndex()); |
| |
| printf("\n"); |
| } |
| |
| #endif // defined(_DEBUG) |
| |
| // |
| // Gets invoked BEFORE predicate groups within unit have been shuffled. This |
| // should not be called for the static solution |
| // |
| void PCodePredicateGroup::adjustCost(GROUPLIST& unit) |
| { |
| CollIndex i; |
| PCodePredicateGroup* first = unit[0]; |
| PCodePredicateGroup* last = unit[unit.entries() - 1]; |
| |
| // Taken counts can be determined for any given predicate group by comparing |
| // its seen count with that of it's successor - the difference indicates how |
| // many times control exited the unit (early-exit). This method can't be |
| // done for the last predicate group in the unit, however. |
| |
| for (i=0; i < unit.entries()-1; i++) |
| unit[i]->setTakenCount(unit[i]->getSeenCount() - unit[i+1]->getSeenCount()); |
| |
| // Counts need to be flipped when the last member falls through to the target |
| // of the other members in the unit. |
| |
| // The taken count for the last predicate group represents the number of times |
| // control exited-early *within* that group. But now that this group is |
| // part of a unit, the taken count may need to be flipped since early-exit |
| // means something else in this case. And since both a BRANCH_AND_CNT or a |
| // BRANCH_OR_CNT may be used in the last predicate block to target the same |
| // exit block, special-care must be taken. |
| // |
| // If the last predicate group in the unit has a taken count associated with |
| // the same early-exit as that of the first predicate group, then no need to |
| // adjust the counts since they accurately reflect the early-exit. If not, |
| // however, then the counts needs to be flipped. |
| // |
| if (first->getLastPredBlock()->getTargetBlock() != |
| last->getFirstPredBlock()->getTargetBlock()) |
| { |
| // The fallthrough of this last predicate group is in fact the early exit |
| // block, so flip counts. |
| last->setTakenCount(last->getSeenCount() - last->getTakenCount()); |
| } |
| } |
| |
| // |
| // Gets invoked AFTER predicate groups within unit have been shuffled based |
| // on cost |
| // |
| void PCodePredicateGroup::setCost(GROUPLIST& unit, NABoolean isStatic) |
| { |
| CollIndex i; |
| Int64 totalSeen = 0; |
| Int64 totalTaken = 0; |
| Int64 totalCost = 0; |
| |
| // First determine how many times this unit was executed and set seenCount_ |
| // for the unit accordingly. Because any predicate group can be the head of |
| // this unit after swapping is done, the group with the highest seenCount_ |
| // must have the correct seenCount_ for the unit. |
| |
| for (i=1; i < unit.entries(); i++) { |
| if (seenCount_ < unit[i]->getSeenCount()) |
| seenCount_ = unit[i]->getSeenCount(); |
| } |
| |
| // Just to avoid potential divide-by-zero |
| if (seenCount_ == 0) |
| return; |
| |
| // Recalculate seen and taken counts based on order of predicate groups in |
| // unit. |
| for (totalSeen = seenCount_, i=0; i < unit.entries(); i++) { |
| // Old counts for this unit |
| Int64 seen = unit[i]->getSeenCount(); |
| Int64 taken = unit[i]->getTakenCount(); |
| |
| // Increment taken count for this unit |
| totalTaken += taken; |
| |
| // Calculate hit ratio |
| double hitRatio = (seen == 0) ? 0 : ((float)taken / (float)seen); |
| |
| // New predicated taken counts for this unit |
| taken = (Int64)(hitRatio * (float)totalSeen); |
| |
| // Increment cost for this unit |
| totalCost += (unit[i]->getCost() * totalSeen); |
| |
| // Next child will see however many this child saw minus early exits |
| totalSeen -= taken; |
| } |
| |
| takenCount_ = totalTaken; |
| |
| // If static, assume on average that half the blocks are visited in this |
| // unit before an early exit. As such, divide the total cost seen by 2. If |
| // dynamic, the weighted average is taken by refactoring in the seen count. |
| if (isStatic) |
| cost_ = (Int64)((float)totalCost / (float)2); |
| else |
| cost_ = (Int64)((float)totalCost / (float)seenCount_); |
| |
| // Always make cost_ a multiple of the second costliest PCODE instruction. |
| // At minimum its cost should be 1. |
| //cost_ = (cost_ < 100) ? 1 : (cost_/100 * 100); |
| } |
| |
| // |
| // Swap this predicate group with the "child" or "down" predicate group passed |
| // in. Edges to and from each predicate block in each of the predicate groups |
| // are rewired so that one replaces the other in the control flow graph |
| // |
| void PCodePredicateGroup::swap(PCodePredicateGroup* down, NABoolean adjacent) |
| { |
| CollIndex i; |
| Int32 tempOpc; |
| |
| BLOCKLIST origPredsDown(heap_); |
| BLOCKLIST origPredsUp(heap_); |
| BLOCKLIST origSuccsDown(heap_); |
| BLOCKLIST origSuccsUp(heap_); |
| |
| // |
| // Copying one NALIST into another has proven to be difficult. The "=" |
| // operator defined in the base class NACollection is for some reason not |
| // found at compile-time. And the copy constructor itself doesn't end up |
| // copying the entries in the list. So the solution used here is to first |
| // create the list and then to directly each entry into the new list. |
| // |
| |
| // Copy down's preds into origPredsDown |
| for (i=0; i < down->getFirstPredBlock()->getPreds().entries(); i++) |
| origPredsDown.insert(down->getFirstPredBlock()->getPreds()[i]); |
| |
| // Copy up's preds into origPredsUp |
| for (i=0; i < getFirstPredBlock()->getPreds().entries(); i++) |
| origPredsUp.insert(getFirstPredBlock()->getPreds()[i]); |
| |
| // Copy down's succs into origSuccsDown |
| for (i=0; i < down->getLastPredBlock()->getSuccs().entries(); i++) |
| origSuccsDown.insert(down->getLastPredBlock()->getSuccs()[i]); |
| |
| // Copy up's succs into origSuccsUp |
| for (i=0; i < getLastPredBlock()->getSuccs().entries(); i++) |
| origSuccsUp.insert(getLastPredBlock()->getSuccs()[i]); |
| |
| |
| // Record the first and last block of the up and down groups |
| |
| PCodeBlock* origFirstBlockDown = down->getFirstPredBlock(); |
| PCodeBlock* origFirstBlockUp = getFirstPredBlock(); |
| PCodeBlock* origLastBlockDown = down->getLastPredBlock(); |
| PCodeBlock* origLastBlockUp = getLastPredBlock(); |
| |
| // |
| // Fix up pred edges to new top group |
| // |
| |
| for (i=0; i < origPredsUp.entries(); i++) { |
| |
| // Add edge from preds to down, properly adding fall-through edges |
| if (origPredsUp[i]->getFallThroughBlock() == origFirstBlockUp) |
| origPredsUp[i]->addFallThroughEdge(origFirstBlockDown); |
| else |
| origPredsUp[i]->addEdge(origFirstBlockDown); |
| |
| // Remove the old pred edges to up |
| origPredsUp[i]->removeEdge(origFirstBlockUp); |
| } |
| |
| // |
| // Fix up pred edges to new down group |
| // |
| |
| for (i=0; !adjacent && (i < origPredsDown.entries()); i++) { |
| // Add edge from preds to down, properly adding fall-through edges |
| if (origPredsDown[i]->getFallThroughBlock() == origFirstBlockDown) |
| origPredsDown[i]->addFallThroughEdge(origFirstBlockUp); |
| else |
| origPredsDown[i]->addEdge(origFirstBlockUp); |
| |
| // Remove the old pred edges to down |
| origPredsDown[i]->removeEdge(origFirstBlockDown); |
| } |
| |
| // |
| // Fix interior nodes of new top group (i.e not last node) |
| // |
| |
| for (i=0; i < down->getPredicateBlocks().entries()-1; i++) { |
| PCodeBlock* block = down->getPredicateBlocks()[i]; |
| |
| // If this block points to some other interior node, then keep it - it |
| // doesn't need to be fixed up. |
| if ((block->getTargetBlock() != origLastBlockDown->getTargetBlock()) && |
| (block->getTargetBlock() != origLastBlockDown->getFallThroughBlock())) |
| continue; |
| |
| // First remove edge to target block |
| block->removeEdge(block->getTargetBlock()); |
| |
| // Replace with edge to new target block, which will either be the beginning |
| // of the old up group or the target of the old up group. |
| if (block->getLastInst()->getOpcode() == |
| origLastBlockUp->getLastInst()->getOpcode()) |
| block->addEdge(origLastBlockUp->getTargetBlock()); |
| else { |
| if (adjacent) |
| block->addEdge(origFirstBlockUp); |
| else |
| block->addEdge(origLastBlockUp->getFallThroughBlock()); |
| } |
| } |
| |
| // |
| // Fix interior nodes of new down group (i.e. not last node) |
| // |
| |
| for (i=0; i < getPredicateBlocks().entries()-1; i++) { |
| PCodeBlock* block = getPredicateBlocks()[i]; |
| |
| // If this block points to some other interior node, then keep it - it |
| // doesn't need to be fixed up. |
| if ((block->getTargetBlock() != origLastBlockUp->getTargetBlock()) && |
| (block->getTargetBlock() != origLastBlockUp->getFallThroughBlock())) |
| continue; |
| |
| // First remove edge to target block |
| block->removeEdge(block->getTargetBlock()); |
| |
| // Replace with edge to new target block, which will either be the beginning |
| // of the old up group or the target of the old up group. |
| if (block->getLastInst()->getOpcode() == |
| origLastBlockDown->getLastInst()->getOpcode()) |
| block->addEdge(origLastBlockDown->getTargetBlock()); |
| else |
| block->addEdge(origLastBlockDown->getFallThroughBlock()); |
| } |
| |
| // Swap opcodes |
| // TODO: we may need to swap result operands as well (qat/qatdml13) |
| #if 0 |
| select count(*) |
| from btsel01 |
| where |
| (binary_signed > 1000 |
| AND pic_decimal_1 + pic_decimal_2 * 2 <= 8) |
| OR pic_x_7 NOT LIKE ?p4 |
| OR pic_decimal_3 <> 8 |
| AND pic_decimal_3 >= 6; |
| #endif |
| |
| tempOpc = origLastBlockUp->getLastInst()->getOpcode(); |
| origLastBlockUp->getLastInst()->code[0] = |
| origLastBlockDown->getLastInst()->getOpcode(); |
| origLastBlockDown->getLastInst()->code[0] = tempOpc; |
| |
| |
| // |
| // Remove down's succs and up's succs and then fix them up |
| // |
| |
| while (origLastBlockDown->getSuccs().entries()) |
| origLastBlockDown->removeEdge(origLastBlockDown->getSuccs()[0]); |
| |
| while (origLastBlockUp->getSuccs().entries()) |
| origLastBlockUp->removeEdge(origLastBlockUp->getSuccs()[0]); |
| |
| if (origSuccsUp[0] == origFirstBlockDown) |
| origLastBlockDown->addEdge(origFirstBlockUp); |
| else |
| origLastBlockDown->addEdge(origSuccsUp[0]); |
| |
| origLastBlockDown->addEdge(origSuccsUp[1]); |
| origLastBlockUp->addEdge(origSuccsDown[0]); |
| origLastBlockUp->addEdge(origSuccsDown[1]); |
| |
| // |
| // Fix up entryBlock |
| // |
| |
| if (origFirstBlockUp->isEntryBlock()) |
| origFirstBlockDown->setToEntryBlock(); |
| } |
| |
| // |
| // Determine if the predicate group in the unit at index "head" can be swapped |
| // with that at index "tail". The two can't be swapped if, in between them, |
| // is a predicate group with a "no cross" stamp |
| // |
| NABoolean PCodePredicateGroup::containsXBlock(GROUPLIST& unit, |
| CollIndex head, |
| CollIndex tail) |
| { |
| CollIndex i, j; |
| PCodeOperand *read, *write; |
| PCodePredicateGroup* top = unit[head]; |
| PCodePredicateGroup* bottom = unit[tail]; |
| |
| // WAW violations - no violation if write are the same in all groups |
| for (i=tail-1; (Int32)i >= (Int32)head; i--) |
| { |
| for (j=0; j < bottom->getWrites().entries(); j++) { |
| write = bottom->getWrites()[j]; |
| if (!(unit[i]->getWrites().contains(write))) |
| return TRUE; |
| } |
| |
| for (j=0; j < unit[i]->getWrites().entries(); j++) { |
| write = unit[i]->getWrites()[j]; |
| if (!(bottom->getWrites().contains(write))) |
| return TRUE; |
| } |
| } |
| |
| // RAW violation |
| for (i=0; i < bottom->getReads().entries(); i++) { |
| read = bottom->getReads()[i]; |
| // Check if any of bottom's read operands conflict with write operands |
| for (j=tail-1; (Int32)j >= (Int32)head; j--) |
| if (unit[j]->getWrites().contains(read)) |
| return TRUE; |
| } |
| |
| // WAR violations |
| for (i=0; i < bottom->getWrites().entries(); i++) { |
| write = bottom->getWrites()[i]; |
| // Check if any of bottom's write operands conflict with read operands |
| for (j=tail-1; (Int32)j >= (Int32)head; j--) |
| if (unit[j]->getReads().contains(write)) |
| return TRUE; |
| } |
| |
| // RAW violation (starting with top now) |
| for (i=0; i < top->getReads().entries(); i++) { |
| read = top->getReads()[i]; |
| // Check if any of bottom's read operands conflict with write operands |
| for (j=head+1; j <= tail; j++) |
| if (unit[j]->getWrites().contains(read)) |
| return TRUE; |
| } |
| |
| // WAR violations (starting with top now) |
| for (i=0; i < top->getWrites().entries(); i++) { |
| write = top->getWrites()[i]; |
| // Check if any of top's write operands conflict with read operands |
| for (j=head+1; j <= tail; j++) |
| if (unit[j]->getReads().contains(write)) |
| return TRUE; |
| } |
| |
| return FALSE; |
| } |
| |
| // |
| // Identify groups which are (or aren't) PIC (position-independent-code) blocks. |
| // Blocks are not PIC if either 1) they defined a write operand which gets read |
| // in a different predicate block or 2) they read an operand which has no def in |
| // it's block. In either case, the reads and writes list of the block are |
| // updated. |
| // |
| void PCodePredicateGroup::identifyPICGroups(PCodeCfg* cfg, |
| CollHeap* heap, |
| GROUPLIST& allGroups) |
| { |
| CollIndex i, j, k; |
| PCodeBlock* block = getPredicateBlocks()[0]; |
| |
| NABitVector operands(heap); |
| |
| PCodeBlock* entryBlock = cfg->getEntryBlock(); |
| |
| FOREACH_INST_IN_BLOCK_BACKWARDS(block, inst) { |
| // Kill defs first |
| for (i=0; i < inst->getWOps().entries(); i++) |
| operands -= inst->getWOps()[i]->getBvIndex(); |
| |
| // Now add uses - note, consts don't count since they are constant defs |
| for (i=0; i < inst->getROps().entries(); i++) |
| if (!inst->getROps()[i]->isConst()) |
| operands += inst->getROps()[i]->getBvIndex(); |
| } ENDFE_INST_IN_BLOCK_BACKWARDS |
| |
| i = operands.getLastStaleBit(); |
| for (; (i = operands.prevUsed(i)) != NULL_COLL_INDEX; i--) |
| { |
| PCodeOperand* read = cfg->getMap()->getFirstValue(&i); |
| |
| PCodeInst* defE = entryBlock->reachingDefsTab_->find(i, TRUE /* In */); |
| PCodeInst* defB = block->reachingDefsTab_->find(i, TRUE /* In */); |
| |
| // If the def comes from entry, continue |
| if ((defE != NULL) && (defE == defB)) |
| continue; |
| |
| // Add read to reads list |
| addRead(read); |
| |
| // If the def comes from a predicate group, then continue to consider this |
| // as a predicate block, but mark the predicate block containing the def |
| // as one which swapping can't cross over - i.e. no cross group. |
| for (j=0; j < allGroups.entries(); j++) { |
| PCodeBlock* basicPB = allGroups[j]->getPredicateBlocks()[0]; |
| |
| if (basicPB->reachingDefsTab_->find(i) == NULL) |
| continue; |
| |
| FOREACH_INST_IN_BLOCK_BACKWARDS(basicPB, def) { |
| for (k=0; k < def->getWOps().entries(); k++) |
| if (def->getWOps()[k]->getBvIndex() == i) { |
| allGroups[j]->addWrite(read); |
| } |
| } ENDFE_INST_IN_BLOCK_BACKWARDS |
| } |
| } |
| } |
| |
| // |
| // Determine if this basic block can be treated as a basic predicate group (or |
| // a predicate block). Basic predicate groups must be self-containing such |
| // that they are position-independent and can be moved freely. |
| // |
| NABoolean PCodePredicateGroup::isPredicateBlock(PCodeBlock* block) |
| { |
| FOREACH_INST_IN_BLOCK_BACKWARDS(block, inst) { |
| // Some clauses should not be allowed to be swapped because they have |
| // side-effects. |
| if (inst->clause_ && |
| (inst->clause_->getClassID() == ex_clause::FUNC_RAISE_ERROR_ID)) |
| return FALSE; |
| } ENDFE_INST_IN_BLOCK_BACKWARDS |
| |
| return TRUE; |
| } |
| |
| // |
| // Returns TRUE if this predicate group "dominates" the sibling predicate group |
| // passed in. It does so if and only iff all edges flowing into the sibling |
| // predicate group come from this predicate group. |
| // |
| NABoolean PCodePredicateGroup::dominates(PCodePredicateGroup* sibling) |
| { |
| CollIndex i; |
| PCodeBlock* siblingFirstBlock = sibling->getFirstPredBlock(); |
| |
| // Since the first predicate block of the sibling predicate group dominates |
| // all the predicate block members in its group, we only need to check the |
| // edges flowing into the sibling's first pred block to see if this predicate |
| // group dominates sibling. |
| for (i=0; i < siblingFirstBlock->getPreds().entries(); i++) |
| { |
| PCodeBlock* pred = siblingFirstBlock->getPreds()[i]; |
| |
| // If this group does not contain the predicate block, then it can't |
| // possibly dominate sibling |
| if (predicates_.index(pred) == NULL_COLL_INDEX) |
| return FALSE; |
| } |
| |
| // If sibling has no preds, it can't be dominated |
| if (i == 0) |
| return FALSE; |
| |
| return TRUE; |
| } |
| |
| // |
| // Recursive algorithm to determine the unit head by this predicate group. In |
| // the process of this algorithm, other sub-units may be discovered and thereby |
| // processed, in order to correctly discover this unit. |
| // |
| // A "unit" list is passed in to record all predicate groups for this unit. The |
| // workList of predicate groups to search for inclusion is also passed in. If |
| // a swap occurred at any point during this algorithm, this knowledge is passed |
| // back to the caller. And lastly, the flag "isStatic" is passed in for use |
| // in the algorithm |
| // |
| // Return TRUE if any unit is found |
| // |
| NABoolean PCodePredicateGroup::findUnits(GROUPLIST& unit, |
| GROUPLIST& workList, |
| NABoolean& swapPerformed, |
| NABoolean isStatic) |
| { |
| CollIndex i, j, k; |
| PCodePredicateGroup *sibling, *group = this; |
| NABoolean unitFound = FALSE; |
| |
| #if defined(_DEBUG) |
| NABoolean debug = FALSE; |
| #endif // defined(_DEBUG) |
| |
| // The group is part of the unit, so add to unit and remove from workList |
| unit.insert(group); |
| workList.remove(group); |
| |
| // Find siblings |
| for (i=0; i < workList.entries(); i++) |
| { |
| // Record potential sibling |
| sibling = workList[i]; |
| |
| // 1. This group must dominate the potential sibling group |
| if (!group->dominates(sibling)) |
| continue; |
| |
| // 2. Target of this group must either be the fall-through/target block of |
| // the potential sibling group, and that even depends on the branch opc |
| // A change in opc implies that the target of the older sibling should |
| // be the fall-through of the younger sibling, if the two are to be in |
| // the same predicate group. |
| |
| PCodeBlock* target = group->getLastPredBlock()->getTargetBlock(); |
| PCodeBlock* siblingLastBlock = sibling->getLastPredBlock(); |
| Int32 opc = group->getLastPredBlock()->getLastInst()->getOpcode(); |
| |
| // If older and younger sibling have different opcodes... |
| if (opc != siblingLastBlock->getLastInst()->getOpcode()) |
| { |
| // The two siblings are in the same predicate group if the target block |
| // of the older sibling is the same as the fall-through block of the |
| // younger sibling. If they're not the same, this implies that the |
| // younger sibling may in fact be part of a sub-unit, and that unit needs |
| // to be materialized first before we can move forward. |
| |
| if (target != siblingLastBlock->getFallThroughBlock()) { |
| |
| // Attempt to find a sub-unit for this younger sibling |
| GROUPLIST tempUnit(heap_); |
| if (sibling->findUnits(tempUnit, workList, swapPerformed, isStatic)) |
| unitFound = TRUE; |
| |
| // Check if condition (2) is now met. If not, restart search for |
| // younger sibling. |
| if (opc != sibling->getLastPredBlock()->getLastInst()->getOpcode()) { |
| if (sibling->getLastPredBlock()->getFallThroughBlock() != target) { |
| i = (CollIndex)-1; |
| continue; |
| } |
| } |
| else { |
| if (sibling->getLastPredBlock()->getTargetBlock() != target) { |
| i = (CollIndex)-1; |
| continue; |
| } |
| } |
| |
| // All conditions met, so insert younger sibling into predicate group |
| unit.insert(tempUnit[0]); |
| workList.remove(tempUnit[0]); |
| } |
| else { |
| // Insert younger sibling into predicate group |
| unit.insert(sibling); |
| workList.remove(sibling); |
| } |
| } |
| else |
| { |
| // This algorithm is the same as the logic above in the then clause - it |
| // just treats things differently because of the opcode check. |
| if (target != siblingLastBlock->getTargetBlock()) { |
| GROUPLIST tempUnit(heap_); |
| if (sibling->findUnits(tempUnit, workList, swapPerformed, isStatic)) |
| unitFound = TRUE; |
| |
| if (opc != sibling->getLastPredBlock()->getLastInst()->getOpcode()) { |
| if (sibling->getLastPredBlock()->getFallThroughBlock() != target) { |
| i = (CollIndex)-1; |
| continue; |
| } |
| } |
| else { |
| if (sibling->getLastPredBlock()->getTargetBlock() != target) { |
| i = (CollIndex)-1; |
| continue; |
| } |
| } |
| |
| unit.insert(tempUnit[0]); |
| workList.remove(tempUnit[0]); |
| } |
| else { |
| unit.insert(sibling); |
| workList.remove(sibling); |
| } |
| } |
| |
| // Now group becomes what the sibling was |
| group = sibling; |
| |
| // Restart search from the beginning now |
| i=(CollIndex)-1; |
| } |
| |
| #if defined(_DEBUG) |
| // Just some extra checking to make sure we've exhausted all possible |
| // siblings. TODO: remove this once testing shows it to be stable. |
| sibling = unit[0]; |
| for (i=0; i < workList.entries(); i++) { |
| group = workList[i]; |
| |
| if (group->dominates(sibling)) |
| assert(FALSE); |
| } |
| #endif // defined(_DEBUG) |
| |
| // If unit isn't big enough, continue |
| if (unit.entries() < 2) |
| return unitFound; |
| |
| // Mark flag that a unit was found and so this expression should be analyzed |
| // at runtime to see if counts affect how predicates within this expression |
| // should be arranged. |
| unitFound = TRUE; |
| |
| // At this point, group represents the leader of the unit. Fix up the |
| // costs of the last block within this unit, should it need fixing. |
| if (!isStatic) |
| group->adjustCost(unit); |
| |
| #if defined(_DEBUG) |
| if (debug) { |
| printf("Unit found:\n"); |
| for (i=0; i < unit.entries(); i++) |
| unit[i]->print(); |
| printf("\n"); |
| } |
| #endif // defined(_DEBUG) |
| |
| // Record the fact on whether or not this unit contains a predicate group |
| // with a write operand. |
| NABoolean writeFound = FALSE; |
| for (j=0; j < unit.entries(); j++) { |
| if (unit[j]->getWrites().entries()) { |
| writeFound = TRUE; |
| break; |
| } |
| } |
| |
| // Swap elements in unit based on cost |
| for (j=0; j < unit.entries(); j++) { |
| // Make the assumption that the first predicate group member in this unit |
| // has the best selectivity, and therefore is appropriately placed |
| CollIndex minIndex = j; |
| |
| // Go through each predicate group member and check if that member has a |
| // selectivity higher than the one identified so far. |
| for (k=j+1; k < unit.entries(); k++) { |
| |
| float kSel = unit[k]->getSelectivity(isStatic); |
| float minSel = unit[minIndex]->getSelectivity(isStatic); |
| float diff = kSel - minSel; |
| |
| // Any difference in selectivity results is a reason for swapping |
| if (diff > 0.0) { |
| // Now for the secondary criteria where the hit-ratio isn't factored |
| // in - the taken/cost ratio should be considerable so that we don't |
| // let go of a good thing. |
| |
| kSel = unit[k]->getMinorSelectivity(isStatic); |
| minSel = unit[minIndex]->getMinorSelectivity(isStatic); |
| diff = (minSel == 0) ? kSel : (kSel/minSel); |
| |
| if (diff >= 0.25) { |
| // One more check. Do not allow the swap to occur if either groups |
| // violate any read/write dependencies. |
| if (!writeFound || !unit[j]->containsXBlock(unit, j, k)) |
| minIndex = k; |
| } |
| } |
| } |
| |
| // Identify the minGroup - i.e. the group that should be scheduled earliest |
| PCodePredicateGroup* minGroup = unit[minIndex]; |
| |
| if (minIndex != j) { |
| unit[j]->swap(minGroup, (j+1 == minIndex) /* are groups adjacent */ ); |
| |
| // Move minimal cost group to position j, so as to not process it again |
| unit[minIndex] = unit[j]; |
| unit[j] = minGroup; |
| |
| // Record the fact that a swap was made, changing the pcode graph |
| swapPerformed = TRUE; |
| } |
| } |
| |
| // Have "group" point to the beginning of the unit, and then reset its cost |
| // and counters based on the unit containing it. |
| group = unit[0]; |
| group->setCost(unit, isStatic); |
| |
| // Propagate knowledge about reads and writes for this unit. |
| for (i=1; i < unit.entries(); i++) { |
| for (j=0; j < unit[i]->getReads().entries(); j++) |
| group->getReads().insert(unit[i]->getReads()[j]); |
| for (j=0; j < unit[i]->getWrites().entries(); j++) |
| group->getWrites().insert(unit[i]->getWrites()[j]); |
| } |
| |
| #if defined(_DEBUG) |
| if (debug) { |
| printf("Unit after swap:\n"); |
| for (i=0; i < unit.entries(); i++) |
| unit[i]->print(); |
| printf("\n"); |
| } |
| #endif // defined(_DEBUG) |
| |
| // Merge the rest of the unit into the leader predicate group |
| while (unit.entries() > 1) { |
| group->getPredicateBlocks().insert(unit[1]->getPredicateBlocks()); |
| unit.remove(unit[1]); |
| } |
| |
| // Reinsert the merged group into the worklist |
| workList.insertAt(0, group); |
| |
| return unitFound; |
| } |
| |
| NABoolean PCodeCfg::reorderPredicates(NABoolean isStatic) |
| { |
| CollIndex i; |
| GROUPLIST allGroups(heap_); |
| Int32 triggerCount; |
| |
| NABoolean swapPerformed = FALSE; |
| NABoolean unitFound = FALSE; |
| PCodeBlock* start = entryBlock_; |
| |
| // Currently only run this optimization if we're dealing with a SCAN |
| if (expr_->getType() != ex_expr::exp_SCAN_PRED) |
| return swapPerformed; |
| |
| // If we have a bulk null branch, we only want to optimize in the case when |
| // we have no nulls to process. This will make it less likely to find false |
| // predicate blocks. |
| if (start->getLastInst()->isBulkNullBranch()) |
| start = start->getTargetBlock(); |
| |
| // Find all basic predicate groups. It's very important that we process |
| // blocks in the reverse depth-first-order so that calls to "findUnit" come |
| // from predicate blocks which aren't dominated by any other predicate block. |
| // This will ensure faster convergence for the algorithm. |
| FOREACH_BLOCK_REV_DFO_AT(start, block, firstInst, lastInst, index) |
| { |
| // Only blocks terminated by a logical branch qualify (potentially) as |
| // a predicate block. If terminated by a logical counted branch, no |
| // additional check is needed since that implies that the block already |
| // meets the requirements. |
| |
| if (lastInst && |
| (lastInst->isLogicalBranch() || lastInst->isLogicalCountedBranch()) && |
| PCodePredicateGroup::isPredicateBlock(block)) |
| { |
| PCodePredicateGroup* group = |
| (PCodePredicateGroup*) new(heap_) |
| PCodePredicateGroup(heap_, block, isStatic, this); |
| |
| allGroups.insert(group); |
| } |
| } ENDFE_BLOCK_REV_DFO |
| |
| // Don't proceed if we don't have at least 2 predicate groups to swap. |
| // Similarly, don't proceed if we have *too* many groups. |
| if ((allGroups.entries() < 2) || (allGroups.entries() > 1000)) { |
| if (allGroups.entries()) |
| allGroups[0]->destroy(allGroups); |
| return FALSE; |
| } |
| |
| // Now that we have the possibility of having a unit, identify read operands |
| // that reach each predicate group, and write operands which are live at the |
| // end of each predicate group. |
| for (i=0; i < allGroups.entries(); i++) |
| allGroups[i]->identifyPICGroups(this, heap_, allGroups); |
| |
| // Create a workList so as to maintain allGroups for later iteration |
| GROUPLIST workList(heap_); |
| for (i=0; i < allGroups.entries(); i++) |
| workList.insert(allGroups[i]); |
| |
| // Find all units and reorder them individually (and together) based on cost |
| // and frequency. |
| while (workList.entries()) { |
| GROUPLIST tempUnit(heap_); |
| if (workList[0]->findUnits(tempUnit, workList, swapPerformed, isStatic)) |
| unitFound = TRUE; |
| } |
| |
| // If no unit was found, then don't set trigger count for predicates in this |
| // expr since no predicates were identified and thus none can be swapped |
| // And since trigger count at compile-time is 0, we just need to return. |
| // We also can leave trigger count as 0 if dynamic predicate reordering is |
| // disabled |
| if (!unitFound || (isStatic && !(optFlags_ & DYNAMIC_REORDER_PREDICATES))) { |
| allGroups[0]->destroy(allGroups); |
| return swapPerformed; |
| } |
| |
| // Initialize trigger count for dynamic predicate reordering, and then reset |
| // it based on whether or not a swap was performed |
| triggerCount = allGroups[0]->getPredicateBlocks()[0]->getLastInst()->code[2]; |
| |
| // Scale trigger count down if it reached a point that's too high, and force |
| // counters to be cleansed by assuming a swap was done. |
| if (triggerCount > 0x3FFFFFFF) { |
| triggerCount = 0xFFFFF; |
| swapPerformed = TRUE; |
| } |
| |
| // Reset trigger count - init if static, double if dynamic |
| triggerCount = (isStatic) ? TRIGGER_COUNT : (triggerCount + triggerCount); |
| |
| // Reset counters and trigger count here |
| for (i=0; i < allGroups.entries(); i++) { |
| PCodePredicateGroup* group = allGroups[i]; |
| PCodeBlock* block = group->getPredicateBlocks()[0]; |
| PCodeInst* lastInst = block->getLastInst(); |
| |
| // Set trigger count |
| lastInst->code[2] = triggerCount; |
| |
| // Clear out counters if a swap was performed |
| if (swapPerformed) |
| lastInst->clearCounters(); |
| } |
| |
| allGroups[0]->destroy(allGroups); |
| return swapPerformed; |
| } |