| /********************************************************************** |
| // @@@ START COPYRIGHT @@@ |
| // |
| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // |
| // @@@ END COPYRIGHT @@@ |
| **********************************************************************/ |
| |
| #include "ExpPCodeOptimizations.h" |
| |
| #define HASH_ALL TRUE |
| #define REORDER_HASH TRUE |
| |
| // |
| // This is a recursive function that attempts to find a hash inst whose write |
| // operand is equivalent to the provided def operand. Begin the search at the |
| // provided start inst. |
| // |
| PCodeInst* PCodeCfg::findHashInst(PCodeOperand* def, PCodeInst* start) |
| { |
| CollIndex defIndex = def->getBvIndex(); |
| PCodeBlock* block; |
| |
| if (start == NULL) |
| return NULL; |
| |
| block = start->block; |
| |
| // Walk each inst backwards from start in search for an inst that defines |
| // "def". If that inst is not a hash or a bulk hash combined inst, then |
| // return NULL. |
| |
| FOREACH_INST_IN_BLOCK_BACKWARDS_AT(block, inst, start) { |
| for (CollIndex i=0; i < inst->getWOps().entries(); i++) { |
| if (defIndex == inst->getWOps()[i]->getBvIndex()) { |
| if (inst->isHash() || |
| inst->getOpcode() == PCIT::HASHCOMB_BULK_MBIN32U) |
| return inst; |
| |
| return NULL; |
| } |
| } |
| } ENDFE_INST_IN_BLOCK_BACKWARDS_AT |
| |
| // At this point, we could not find the instruction which defined def. Now |
| // if the definition reaches this block, then all paths to this block must |
| // have the definition of this def operand. So, only check the first pred |
| // via a recursive call. |
| |
| if (block->reachingDefsTab_->find(defIndex, TRUE /* In */)) |
| if (block->getPreds().entries() > 0) |
| return findHashInst(def, block->getPreds()[0]->getLastInst()); |
| |
| // Definition could not be found - return NULL. |
| return NULL; |
| } |
| |
| // |
| // Create bulk hash combined instructions from multiple hash and hashcomb |
| // instructions. |
| // |
| void PCodeCfg::bulkHashComb() |
| { |
| CollIndex i, j, length, idx; |
| |
| FOREACH_BLOCK(block, firstInst, lastInst, index) { |
| FOREACH_INST_IN_BLOCK(block, inst) { |
| if (inst->getOpcode() == PCIT::HASHCOMB_MBIN32U_MBIN32U_MBIN32U) |
| { |
| PCodeInst* newInst; |
| |
| // Find the hash instruction which defines the source operands of this |
| // hashcomb instruction. |
| PCodeInst* src1 = findHashInst(inst->getROps()[0], inst); |
| PCodeInst* src2 = findHashInst(inst->getROps()[1], inst); |
| |
| // Unless HASH_ALL is set, continue if either src1 or src2 is NULL. |
| // HASH_ALL is used when the definition found does not need to be |
| // hashed again - this is often the case when no hash instruction |
| // exists (e.g., OPDATA/CLAUSE_EVAL is used). |
| |
| if (!HASH_ALL && (!src1 || !src2)) |
| continue; |
| |
| NABoolean topOpOnlySrc1 = FALSE, topOpOnlySrc2 = FALSE; |
| CollIndex max = MAX_HASHCOMB_BULK_OPERANDS; |
| |
| // Set length taken by operands (index,offset,size,pos = 4 ints / oper) |
| // If src1 is null (i.e. no proper HASH found) or the number of ops |
| // is greater than the max allowed, just use the final result for the |
| // new HASHCOMB_BULK instruction. Otherwise read in all operands. |
| |
| if (src1 && (src1->getROps().entries() < max)) |
| length = src1->getROps().entries(); |
| else { |
| topOpOnlySrc1 = TRUE; |
| length = 1; |
| } |
| |
| if (src2 && ((length+src2->getROps().entries()) <= max)) |
| length += src2->getROps().entries(); |
| else { |
| topOpOnlySrc2 = TRUE; |
| length += 1; |
| } |
| |
| // Add length of write operand, length field, and opcode itself |
| length = 4*(length+1); |
| |
| // Set up pcode byte stream with what we know |
| newInst = createInst(PCIT::HASHCOMB_BULK_MBIN32U, length); |
| newInst->code[1] = length; |
| newInst->code[2] = inst->code[1]; |
| newInst->code[3] = inst->code[2]; |
| |
| // Create one list with all the source operands |
| OPLIST list(heap_); |
| |
| // If the source inst found is not null (i.e. HASH or HASHCOMB_BULK) |
| // then include its operands. Otherwise, we can't hash the source, but |
| // we can still combine it, while hashing everything else. This |
| // requires a small hack of resetting the size of the operand to 0. |
| // A zero length tells the hashing algoritm to return the value as is, |
| // unhashed. |
| |
| if (!topOpOnlySrc1) |
| list.insert(src1->getROps()); |
| else { |
| inst->getROps()[0]->setLen(0); |
| list.insert(inst->getROps()[0]); |
| } |
| |
| if (!topOpOnlySrc2) |
| list.insert(src2->getROps()); |
| else { |
| inst->getROps()[1]->setLen(0); |
| list.insert(inst->getROps()[1]); |
| } |
| |
| for (i=0; i < list.entries(); i++) { |
| newInst->code[(4*i) + 4] = list[i]->getStackIndex(); |
| newInst->code[(4*i) + 5] = list[i]->getOffset(); |
| newInst->code[(4*i) + 6] = list[i]->getLen(); |
| newInst->code[(4*i) + 7] = i; |
| } |
| |
| loadOperandsOfInst(newInst); |
| |
| block->insertInstAfter(inst, newInst); |
| block->deleteInst(inst); |
| } |
| } ENDFE_INST_IN_BLOCK |
| } ENDFE_BLOCK |
| |
| |
| // Now reorder operands in hash for better parallel hashing. |
| if (!REORDER_HASH) |
| return; |
| |
| FOREACH_BLOCK(block, firstInst, lastInst, index) { |
| FOREACH_INST_IN_BLOCK(block, inst) { |
| if (inst->getOpcode() == PCIT::HASHCOMB_BULK_MBIN32U) { |
| |
| // Create one list with all the source operands |
| OPLIST list(heap_); |
| |
| list.insert(inst->getROps()); |
| |
| // Go through the list of operands and find pairs with minimal |
| // differences in lengths. These pairs are more suitable for parallel |
| // hashing. Unfortunately, however, because the need to be combined in |
| // the same order as was generated, we need to also keep track of each |
| // operands position (a.k.a combined order index). |
| |
| for (idx = 0; idx != list.entries();) |
| { |
| CollIndex op1=0, op2=0; // Operand positions of ideal hash pairs |
| UInt32 len1, len2; // Lengths of operands 1 and 2 |
| UInt32 min = UINT_MAX; // Init min diff between operand lengths |
| |
| // Go through each operand in list |
| for (i=0; i < list.entries(); i++) { |
| |
| // If operand was already inserted, continue to next operand |
| if (list[i] == NULL) continue; |
| |
| // Initialize op1 and op2 if this is the first legit operand seen |
| if (min == UINT_MAX) { |
| op1 = op2 = i; |
| } |
| |
| // Length of operand 1 |
| len1 = (list[i]->getLen() == 0) ? -1 : list[i]->getLen(); |
| |
| // Now go through the operand list again, but this time starting |
| // with the operand after op1. The difference in lengths will be |
| // calculated here, and then compared to the least difference found |
| // so far. |
| |
| for (j=i+1; j < list.entries(); j++) { |
| |
| // If operand was already inserted, continue to next operand |
| if (list[j] == NULL) continue; |
| |
| // Length of operand 2 |
| len2 = (list[j]->getLen() == 0) ? -1 : list[j]->getLen(); |
| |
| // Difference calculated must be absolute for comparison purposes |
| UInt32 diff = (len1 > len2) ? len1 - len2 : len2 - len1; |
| |
| // If diff is lowest found so far, we found a new hash candidate |
| if (diff < min) { |
| min = diff; |
| |
| // Always make op1 the smaller of the two, since parallel hash |
| // requires this order |
| op1 = (len1 > len2) ? j : i; |
| op2 = (len1 > len2) ? i : j; |
| } |
| } |
| } |
| |
| // At this point, it is only possible that we either found a pair or |
| // we have but one operand left. In either case, we're inserting at |
| // least one operand. Note that we move the index up in the last |
| // store |
| |
| inst->code[(4*idx) + 4] = list[op1]->getStackIndex(); |
| inst->code[(4*idx) + 5] = list[op1]->getOffset(); |
| inst->code[(4*idx) + 6] = list[op1]->getLen(); |
| inst->code[(4*idx++) + 7] = op1; |
| |
| // Since removing entry out of list will mess up the position info of |
| // each operand, replace with NULL, so that entry is side-stepped in |
| // algorithm |
| |
| list.removeAt(op1); |
| list.insertAt(op1, NULL); |
| |
| // If not a pair, then break out |
| if (op1 == op2) |
| break; |
| |
| // Add op2 and then effectively remove it from list |
| |
| inst->code[(4*idx) + 4] = list[op2]->getStackIndex(); |
| inst->code[(4*idx) + 5] = list[op2]->getOffset(); |
| inst->code[(4*idx) + 6] = list[op2]->getLen(); |
| inst->code[(4*idx++) + 7] = op2; |
| |
| list.removeAt(op2); |
| list.insertAt(op2, NULL); |
| } |
| |
| inst->reloadOperands(this); |
| } |
| } ENDFE_INST_IN_BLOCK |
| } ENDFE_BLOCK |
| } |
| |
| // |
| // Performs code motion and instruction grouping across all blocks |
| // |
| void PCodeCfg::codeMotion() |
| { |
| FOREACH_BLOCK(block, firstInst, lastInst, index) { |
| localCodeMotion(block); |
| } ENDFE_BLOCK |
| } |
| |
| // |
| // This function has two purposes. The first is to group like instructions |
| // together so that 1) they can be later combined into "bulk" forms and 2) to |
| // remove any unnecessary dependencies on execution order. The second |
| // purpose of this function is to find and generate genuine bulk moves where |
| // moves of two adjacent columns can be done in one shot. This comes as an |
| // artifact from the NOT_NULL_BRANCH_BULK instruction |
| // |
| void PCodeCfg::localCodeMotion(PCodeBlock* block) |
| { |
| FOREACH_INST_IN_BLOCK(block, inst) { |
| // Group certain instructions as close together as possible |
| if ((inst->getOpcode() == PCIT::RANGE_MFLT64) || |
| (inst->isNullBitmap()) || |
| (inst->isCopyMove() || inst->isConstMove())) |
| { |
| PCodeInst* insertInst = NULL; |
| |
| // Given inst, walk backwards from inst and try to find a spot to move |
| // inst into, taking consideration of waw/raw/war dependency violations |
| FOREACH_INST_IN_BLOCK_BACKWARDS_AT(block, prev, inst->prev) { |
| |
| CollIndex i, j; |
| NABoolean found = FALSE; |
| PCodeInst* nInst; |
| |
| // Moves are grouped in a more intricate way. If both operands are |
| // same-size moves, then we attempt to order them relative to each |
| // other in a way which would expose adjacent/bulk moves. |
| if (inst->isCopyMove() && prev->isCopyMove()) { |
| Int32 pWOff = prev->getWOps()[0]->getOffset(); |
| Int32 pROff = prev->getROps()[0]->getOffset(); |
| Int32 iWOff = inst->getWOps()[0]->getOffset(); |
| Int32 iROff = inst->getROps()[0]->getOffset(); |
| |
| // First group by writes |
| if ((inst->getWOps()[0]->getStackIndex() == |
| prev->getWOps()[0]->getStackIndex()) && |
| (inst->getWOps()[0]->getOffset() >= |
| prev->getWOps()[0]->getOffset())) { |
| |
| // Next group by reads |
| if ((inst->getROps()[0]->getStackIndex() == |
| prev->getROps()[0]->getStackIndex()) && |
| (inst->getROps()[0]->getOffset() >= |
| prev->getROps()[0]->getOffset())) { |
| |
| // Moves are now in order. Check to see if any overlap exists. |
| // Note, overlapping moves should not involve the same tuple for |
| // both source and target, *unless* we're dealing with different |
| // ranges. Since that's unlikely, just check for same tuple. |
| if ((prev->getWOps()[0]->getStackIndex() != |
| prev->getROps()[0]->getStackIndex()) && |
| ((pWOff + prev->getWOps()[0]->getLen()) == iWOff) && |
| ((pROff + prev->getROps()[0]->getLen()) == iROff)) { |
| |
| Int32 len = prev->getROps()[0]->getLen() + |
| inst->getROps()[0]->getLen(); |
| |
| PCIType::Instruction opc; |
| |
| switch (len) { |
| case 2: |
| opc = PCIT::MOVE_MBIN16U_MBIN16U; |
| break; |
| case 4: |
| opc = PCIT::MOVE_MBIN32U_MBIN32U; |
| break; |
| case 8: |
| opc = PCIT::MOVE_MBIN64S_MBIN64S; |
| break; |
| default: |
| opc = PCIT::MOVE_MBIN8_MBIN8_IBIN32S; |
| break; |
| } |
| |
| // Create a move by combining the two moves together |
| nInst = block->insertNewInstBefore(prev, opc); |
| |
| nInst->code[1] = prev->getWOps()[0]->getStackIndex(); |
| nInst->code[2] = pWOff; |
| nInst->code[3] = prev->getROps()[0]->getStackIndex(); |
| nInst->code[4] = pROff; |
| |
| if (opc == PCIT::MOVE_MBIN8_MBIN8_IBIN32S) |
| nInst->code[5] = len; |
| |
| loadOperandsOfInst(nInst); |
| |
| block->deleteInst(inst); |
| block->deleteInst(prev); |
| |
| // We're no longer trying to insert the inst |
| insertInst = NULL; |
| } |
| break; |
| } |
| } |
| } |
| else { |
| // Check moves first |
| if (inst->isConstMove() && |
| (prev->isConstMove() || prev->isCopyMove()) && |
| (inst->getWOps()[0]->getStackIndex() == |
| prev->getWOps()[0]->getStackIndex())) |
| break; |
| |
| // Check all other instructions next |
| if (prev->getOpcode() == inst->getOpcode()) |
| // If prev is the same type of instr as inst, then already grouped. |
| break; |
| } |
| |
| // First check for write-after-write hazards |
| // I.e. inst writes operand that prev writes to. |
| |
| for (i=0; !found && (i < prev->getWOps().entries()); i++) |
| for (j=0; !found && (j < inst->getWOps().entries()); j++) |
| if (inst->getWOps()[j]->canOverlap(prev->getWOps()[i])) |
| found = TRUE; |
| |
| // Now check for read-after-write hazards |
| // I.e. inst reads operand that prev writes to. |
| |
| for (i=0; !found && (i < prev->getWOps().entries()); i++) |
| for (j=0; !found && (j < inst->getROps().entries()); j++) |
| if (inst->getROps()[j]->canOverlap(prev->getWOps()[i])) |
| found = TRUE; |
| |
| // Lastly, check for write-after-read hazards |
| // I.e. prev reads operand that inst writes to. |
| |
| for (i=0; !found && (i < prev->getROps().entries()); i++) |
| for (j=0; !found && (j < inst->getWOps().entries()); j++) |
| if (inst->getWOps()[j]->canOverlap(prev->getROps()[i])) |
| found = TRUE; |
| |
| // If found is true, or then we can't perform the code motion. |
| if (found) |
| break; |
| |
| // It is safe to move the instruction before prev. |
| insertInst = prev; |
| |
| } ENDFE_INST_IN_BLOCK_BACKWARDS_AT |
| |
| // Move the instruction before insertInst |
| if (insertInst != NULL) { |
| |
| // Don't move inst up if no like instruction was found, and is not a |
| // copy move |
| if ((insertInst->prev == NULL) && (!inst->isCopyMove())) |
| continue; |
| |
| block->deleteInst(inst); |
| |
| // Careful inserting since code motion must be done correctly when |
| // thinking about inserting in between opdata/clause-eval pairs |
| if (insertInst->prev) |
| block->insertInstAfter(insertInst->prev, inst); |
| else |
| block->insertInstBefore(insertInst, inst); |
| } |
| } |
| } ENDFE_INST_IN_BLOCK |
| } |
| |
| NABoolean PCodeCfg::removeBulkNullBranch() |
| { |
| Int32 numOfNB = 0; |
| |
| if (!entryBlock_->getLastInst()->isBulkNullBranch()) |
| return FALSE; |
| |
| PCodeBlock* start = entryBlock_->getFallThroughBlock(); |
| |
| FOREACH_BLOCK_REV_DFO_AT(start, block, firstInst, lastInst, index) { |
| if (lastInst && lastInst->isNullBranch()) { |
| // Don't support bulk null checking for indirect varchars |
| if (lastInst->isVarcharNullBranch() && |
| lastInst->containsIndirectVarcharOperand()) |
| continue; |
| |
| numOfNB++; |
| } |
| } ENDFE_BLOCK_REV_DFO |
| |
| if (numOfNB < 2) { |
| deleteBlock(entryBlock_); |
| entryBlock_ = start; |
| |
| return TRUE; |
| } |
| |
| return FALSE; |
| } |
| |
| NABoolean PCodeCfg::bulkNullBranch() |
| { |
| CollIndex i; |
| PCodeBlock* newBlock; |
| PCodeBlock* copyBlocks; |
| PCodeInst* newInst; |
| Int32 length; |
| OPLIST list(heap_); |
| PCodeBlock** map; |
| NABitVector defs(heap_); |
| |
| NABoolean alignedFormat = FALSE; |
| NABoolean packedFormat = FALSE; |
| NABoolean differentRowsFound = FALSE; |
| Int32 bitmapIdx = 0; |
| Int32 bitmapOff = 0; |
| Int32 bitmapMaxNullBitIndex = -1; |
| |
| Int32 numOfNB = 0; // Number of null branch insts in cfg |
| Int32 numOfProcessedNB = 0; // Num of nb insts we can eval early |
| PCodeBlock* startBlock = entryBlock_; |
| |
| FOREACH_BLOCK_REV_DFO(block, firstInst, lastInst, index) { |
| |
| // Add all defs - the purpose of which is described later. |
| FOREACH_INST_IN_BLOCK(block, inst) { |
| for (i=0; i < inst->getWOps().entries(); i++) |
| defs += inst->getWOps()[i]->getBvIndex(); |
| } ENDFE_INST_IN_BLOCK |
| |
| if (lastInst && lastInst->isNullBranch()) { |
| NABoolean checked = FALSE; |
| |
| // Don't support bulk null checking for indirect varchars |
| if (lastInst->isVarcharNullBranch() && |
| lastInst->containsIndirectVarcharOperand()) |
| continue; |
| |
| // Found a null branch |
| numOfNB++; |
| |
| for (i=0; i < lastInst->getROps().entries(); i++) { |
| PCodeOperand* read = lastInst->getROps()[i]; |
| CollIndex rd = read->getBvIndex(); |
| |
| // Only variable read operands should be considered, since those are the |
| // only ones that can be defined upon entry. If this optimization were |
| // to ever change such that the bulk null branch instruction could be |
| // placed somewhere other than the entry block, revisit this. |
| if (!read->isVar()) |
| continue; |
| |
| #if 0 |
| // Old way of doing this using reaching defs |
| |
| PCodeInst* defI = startBlock->reachingDefsTab_->find(rd, TRUE /* In */); |
| PCodeInst* defO = block->reachingDefsTab_->find(rd); |
| |
| // If the read operand of the null branch has a known def that reaches |
| // it from the start block, and that def is not overwritten in the |
| // block containing the null branch, then it serves as a candidate for |
| // code motion for early null detection. Also, don't add the operand |
| // if it already exists in the list |
| |
| if ((defI != NULL) && (defI == defO)) |
| #else |
| // Instead of using costly reaching defs analysis, use the "defs" bit |
| // vector that has been computed up to this point. The idea being that |
| // if this read operand has been written to at any point in this expr, |
| // than it doesn't qualify for this opt. In almost every situation, a |
| // use of var operand in a null-branch check is done without a previous |
| // write to it in the same expr. |
| |
| if (!defs.contains(rd)) |
| #endif |
| { |
| CollIndex j; |
| |
| // Set checked flag indicating that this is a NULL branch to process |
| checked = TRUE; |
| |
| // Don't add operand if it already exists in list |
| for (j=0; j < list.entries(); j++) |
| if (rd == list[j]->getBvIndex()) |
| break; |
| |
| // If operand does not already exist in the list, add it. |
| if (j == list.entries()) |
| list.insert(read); |
| } |
| } |
| |
| if (checked) |
| numOfProcessedNB++; |
| |
| // If there are too many null branches, return. |
| if (numOfNB > 1000) |
| return FALSE; |
| } |
| } ENDFE_BLOCK_REV_DFO |
| |
| // Heuristic - don't do optimization if less than two null branches were found |
| // or if more than 50 unique nulls must be examined. Why the latter? Because |
| // not all 50 nullable columns will likely be NULL for a given row. |
| if ((numOfProcessedNB < 2) || (list.entries() > 50)) |
| return FALSE; |
| |
| // Mark all blocks as unvisited - used in copyCfg() |
| clearVisitedFlags(); |
| |
| // Create a map table used to map copy blocks to original blocks. Make a copy // of the cfg starting from startBlock. |
| |
| map = (PCodeBlock**) new(heap_) PCodeBlock*[maxBlockNum_ + 1]; |
| copyBlocks = copyCfg(startBlock, NULL, map); |
| NADELETEBASIC(map, heap_); |
| |
| // Create new entry block for not-null-branch-bulk |
| newBlock = createBlock(); |
| |
| // Analyze the not-nulls collected to see if and how we should process |
| for (i=0; i < list.entries(); i++) { |
| Int32 nullBitIndex = list[i]->getNullBitIndex(); |
| if (nullBitIndex == -1) |
| packedFormat = TRUE; |
| else |
| alignedFormat = TRUE; |
| |
| bitmapMaxNullBitIndex = ((nullBitIndex > bitmapMaxNullBitIndex) |
| ? nullBitIndex : bitmapMaxNullBitIndex); |
| |
| if (i == 0) { |
| bitmapIdx = list[i]->getStackIndex(); |
| bitmapOff = list[i]->getOffset(); |
| bitmapMaxNullBitIndex = nullBitIndex; |
| } |
| |
| // A different row representing different null indicators for either |
| // formats must show a different stack index - i.e. no need to compare |
| // offsets as well for aligned format bitmaps. |
| if (bitmapIdx != list[i]->getStackIndex()) |
| differentRowsFound = TRUE; |
| } |
| |
| // As a potential pcode bytecode bloat problem, we want to make sure that if |
| // we're dealing with the alignedFormat, then expect to see at least 1 bit |
| // check per byte in the bitmask (i.e. equivalent to a non-parallel check) |
| if (alignedFormat && !packedFormat) { |
| float real = ((float)list.entries() / (float)bitmapMaxNullBitIndex); |
| float limit = (float)1/(float)8; |
| |
| // If limit reached, generate longer NOT-NULL_BRANCH check |
| if (real < limit) |
| differentRowsFound = TRUE; |
| } |
| |
| if (packedFormat && !alignedFormat) { |
| if (differentRowsFound) { |
| // Length of inst is (opc + len + sub-opc + 2*#operands + branch) |
| length = 3 + 2*list.entries() + 1; |
| newInst = createInst(PCIT::NOT_NULL_BRANCH_BULK, length); |
| newInst->code[1] = length; |
| |
| newInst->code[2] = (differentRowsFound) ? 1 : 0; |
| |
| for (i=0; i < list.entries(); i++) { |
| newInst->code[2*i + 3] = list[i]->getStackIndex(); |
| newInst->code[2*i + 4] = list[i]->getOffset(); |
| } |
| } |
| else { |
| // Length of inst is (opc + len + sub-opc + idx + #operands + branch) |
| length = 4 + list.entries() + 1; |
| newInst = createInst(PCIT::NOT_NULL_BRANCH_BULK, length); |
| newInst->code[1] = length; |
| |
| newInst->code[2] = (differentRowsFound) ? 1 : 0; |
| |
| newInst->code[3] = list[0]->getStackIndex(); |
| for (i=0; i < list.entries(); i++) { |
| newInst->code[4+i] = list[i]->getOffset(); |
| } |
| } |
| } |
| else if ((alignedFormat && !packedFormat) && (!differentRowsFound)) { |
| // Length of inst is (opc + len + sub-opc + bitmap-idx + bitmap-off + |
| // bitmap-len + bitmap-mask + branch) |
| length = 6 + ((bitmapMaxNullBitIndex >> 5) + 1) + 1; |
| newInst = createInst(PCIT::NULL_BITMAP_BULK, length); |
| newInst->code[1] = length; |
| |
| newInst->code[2] = (differentRowsFound) ? 1 : 0; |
| |
| newInst->code[3] = bitmapIdx; |
| newInst->code[4] = bitmapOff; |
| newInst->code[5] = (bitmapMaxNullBitIndex >> 3) + 1; |
| |
| // Null out bitmap mask |
| for (i=0; i < (CollIndex)((bitmapMaxNullBitIndex >> 5) + 1); i++) |
| newInst->code[6+i] = 0; |
| |
| for (i=0; i < list.entries(); i++) { |
| Int32 bitIdx = list[i]->getNullBitIndex(); |
| |
| ((char*)(&newInst->code[6]))[bitIdx >> 3] |= |
| ((UInt32)0x1 << (7 - (bitIdx & 7))); |
| } |
| } |
| else { |
| // Length of inst is (opc + len + sub-opc + (3 * #operands) + branch) |
| length = 3 + 3*list.entries() + 1; |
| newInst = createInst(PCIT::NULL_BITMAP_BULK, length); |
| newInst->code[1] = length; |
| |
| newInst->code[2] = (differentRowsFound) ? 1 : 0; |
| |
| for (i=0; i < list.entries(); i++) { |
| newInst->code[3*i + 3] = list[i]->getStackIndex(); |
| newInst->code[3*i + 4] = list[i]->getOffset(); |
| newInst->code[3*i + 5] = list[i]->getNullBitIndex(); |
| } |
| } |
| |
| |
| loadOperandsOfInst(newInst); |
| |
| // Add the bulk not null branch to the end of the new block. Add the |
| // fall-through edge to the startBlock, and add an target edge to the |
| // copy blocks. |
| |
| newBlock->insertInstBefore(NULL, newInst); |
| newBlock->addEdge(startBlock); |
| newBlock->addEdge(copyBlocks); |
| |
| if (startBlock == entryBlock_) |
| entryBlock_ = newBlock; |
| |
| return TRUE; |
| } |
| |
| // |
| // Create bulk move instructions for adjacent moves. |
| // |
| void PCodeCfg::bulkMove(NABoolean sameSizeMoves) |
| { |
| CollIndex i, j, srcStackIndex; |
| PCodeInst* newInst; |
| INSTLIST list(heap_); |
| PCodeInst* lastMoveInst = NULL; |
| |
| FOREACH_BLOCK(block, firstInst, lastInst, index) { |
| |
| // Since we're finding groups of candidate instructions for the bulk move, |
| // we need to restart at the end of each group. Begin at first inst |
| for (PCodeInst* start = firstInst; start; ) { |
| |
| Int32 length = 0; |
| |
| // Clear the bulk move group list; |
| list.clear(); |
| |
| srcStackIndex = 0; |
| |
| // Given a start inst, traverse down the block in search of adjacent |
| // move inst with similar source and target operands. |
| FOREACH_INST_IN_BLOCK_AT(block, inst, start) { |
| |
| // Reset start inst in prep for next group |
| start = inst->next; |
| |
| // Since sameSizeMoves should always be TRUE for now, we only allow |
| // copy moves. |
| assert (sameSizeMoves == TRUE); |
| if (inst->isCopyMove() || inst->isConstMove()) { |
| |
| if (list.isEmpty()) { |
| // This inst will begin the bulk move group. Record the stack |
| // index used for the source, if one exists |
| |
| list.insert(inst); |
| if (inst->getROps().entries() > 0) |
| srcStackIndex = inst->getROps()[0]->getStackIndex(); |
| |
| if (inst->getOpcode() == PCIT::MOVE_MBIN8_MBIN8_IBIN32S) |
| length++; |
| } |
| else if (inst->getWOps()[0]->getStackIndex() == |
| list[0]->getWOps()[0]->getStackIndex()) |
| { |
| // Target operands match, now we just need to make sure that source |
| // operands match, if one exists |
| |
| if (inst->getROps().entries() > 0) { |
| if (srcStackIndex != 0) { |
| if (srcStackIndex != inst->getROps()[0]->getStackIndex()) { |
| start = inst; |
| break; |
| } |
| } |
| else |
| srcStackIndex = inst->getROps()[0]->getStackIndex(); |
| } |
| |
| lastMoveInst = inst; |
| |
| // Insert this inst in the list by grouping them with like instrs. |
| for (i=list.entries()-1; i >= 0; i--) { |
| CollIndex instWIdx, prevWIdx; |
| |
| if (list[i]->getOpcode() == inst->getOpcode()) { |
| list.insertAt(i+1, inst); |
| break; |
| } |
| |
| instWIdx = inst->getWOps()[0]->getBvIndex(); |
| prevWIdx = list[i]->getWOps()[0]->getBvIndex(); |
| |
| if ((i == 0) || |
| (inst->getROps().entries() && |
| (inst->getROps()[0]->getBvIndex() == prevWIdx)) || |
| (list[i]->getROps().entries() && |
| (instWIdx == list[i]->getROps()[0]->getBvIndex())) || |
| (instWIdx == prevWIdx)) |
| { |
| // If no match was found or a dependency violation exists |
| // which prevents the rearrangement, just insert at the end |
| list.insert(inst); |
| break; |
| } |
| } |
| |
| if (inst->getOpcode() == PCIT::MOVE_MBIN8_MBIN8_IBIN32S) |
| length++; |
| } |
| else { |
| start = inst; |
| break; |
| } |
| } |
| else if (!list.isEmpty()) { |
| // If we arrived at a non-move inst and the group has already been |
| // formed, break out |
| break; |
| } |
| } ENDFE_INST_IN_BLOCK |
| |
| // Here we generate the bulk move inst for the group found if a group of |
| // more than 1 moves were found. |
| if (list.entries() > 1) { |
| |
| // Length of inst is (3 * #operands) + opc + len + tgt-idx + src-idx |
| // Each operand will be (opc, tgt-off, src-off), and may include |
| // len of move as well if dealing with PCIT::MOVE_MBIN8_MBIN8_IBIN32S. |
| // Also, we do a "+=" because length is previously incremented while |
| // searching for groups for the car string moves |
| |
| length += 3*list.entries() + 4; |
| |
| // Format of bulk move instruction is this: |
| // |
| // Opc, length, wrt stk idx, src stk idx, [opc1, tgt-off1, src-off1] |
| // [opc2, tgt-off2, src-off2] |
| // .... |
| // |
| // And if we're dealing with a fast move instruction, that will be: |
| // |
| // [opc3, tgt-off3, src-off3, sz] |
| // |
| newInst = createInst(PCIT::MOVE_BULK, length); |
| newInst->code[1] = length; |
| newInst->code[2] = list[0]->getWOps()[0]->getStackIndex(); |
| newInst->code[3] = srcStackIndex; |
| |
| // Record the last inst before we should insert the buld move inst. |
| PCodeInst* indicatorInst = lastMoveInst->next; |
| |
| // Walk through each read operand in the list and insert the appropriate |
| // fields in the pcode byte stream for the bulk move instruction. |
| for (i=0, j=4; i < list.entries(); i++, j+=3) { |
| // Source offset will either be a *real* source offset, or it is the |
| // constant for a constant move |
| Int32 srcOffset = ((list[i]->getROps().entries()) |
| ? list[i]->getROps()[0]->getOffset() |
| : list[i]->code[3]); |
| |
| // Target offset |
| Int32 tgtOffset = list[i]->getWOps()[0]->getOffset(); |
| |
| newInst->code[j] = list[i]->getOpcode(); |
| newInst->code[j+1] = list[i]->getWOps()[0]->getOffset(); |
| newInst->code[j+2] = srcOffset; |
| |
| if (list[i]->getOpcode() == PCIT::MOVE_MBIN8_MBIN8_IBIN32S) { |
| newInst->code[j+3] = list[i]->code[5]; |
| j++; |
| } |
| |
| block->deleteInst(list[i]); |
| } |
| |
| loadOperandsOfInst(newInst); |
| |
| // If indicatorInst is null, that means we want to insert the bulk move |
| // inst at the end of the block. |
| if (indicatorInst) |
| block->insertInstBefore(indicatorInst, newInst); |
| else |
| block->insertInstAfter(NULL, newInst); |
| } |
| } |
| } ENDFE_BLOCK |
| } |