blob: f7f765af1c725c8b7096de3183edb0507ebf4911 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Nikolay A. Sidelnikov
*/
#include "Ia32IRManager.h"
#include "Ia32Inst.h"
#include "Dominator.h"
#include "Ia32CgUtils.h"
namespace Jitrino {
namespace Ia32 {
/**
* Replaces operations with 64 bits integer values with a set of operations
* supported by IA-32 ISA.
*
* I8PseudoInstruction-s, normally generated by InstCodeSelector.
*/
class I8Lowerer :
public SessionAction,
protected OpndUtils,
protected SubCfgBuilderUtils
{
/**
* Main entry point of this Action.
*/
void runImpl(void);
private:
typedef StlMap<Opnd *,Opnd **> OPND_PAIRS_MAP;
typedef StlVector<Inst*> INST_ARRAY;
typedef StlMap<Inst*, Node*> INST_TO_NODE_MAP;
//
// virtuals
//
U_32 getNeedInfo(void) const
{
return NeedInfo_LoopInfo;
}
U_32 getSideEffects(void) const
{
// Simplest presumption - if we found at least one I8PseudoInst
// we might affect everything, including liveness, loop info
// and dominator tree
return foundI8Opnds;
}
protected:
/**
* Dispatches instruction processing, basing on its Mnemonic.
*/
void processOpnds(Inst * inst);
/**
* Splits the long operand up to the 2 32bits operands.
*/
void prepareNewOpnds(Opnd * longOpnd, Opnd*& lowPartOpnd, Opnd*& hiPartOpnd);
void buildShiftSubGraph(Inst * inst, Opnd * src1_1, Opnd * src1_2, Opnd * src2, Opnd * dst_1, Opnd * dst_2, Mnemonic mnem, Mnemonic opMnem);
void buildComplexSubGraph(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst = NULL);
void buildSetSubGraph(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst = NULL);
void compareAndBranch(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst = NULL);
/**
* Processes I8 IMUL instruction.
*
* Depending on a flag, generates either call to internal helper,
* or generates and inserts sub-graph.
*
* @see inlineMul64
*/
void lowerMul64(Inst* inst);
/**
* Processes I8 IDIV instruction.
*
* Depending on a flag, generates either call to internal helper,
* or generates and inserts sub-graph.
*/
void lowerDiv64(Inst* inst);
/**
* Processes I8 remainder instruction.
*
* Depending on a flag, generates either call to internal helper,
* or generates and inserts sub-graph.
*/
void lowerRem64(Inst* inst);
/**
* Generates sub-graph for multiplication of 2 64-bits long values.
*/
void inlineMul64(Inst* inst);
/**
* Generates sub-graph for calculating quotient and remainder of 2
* 64 bits integer values, using integer instructions.
*/
void inlineDivRem64(bool wantReminder, Inst* inst);
/**
* Tries to propagate and reuse results of inlined Div64 or Rem64
* instruction, to reduce code size and complex operations in the
* resulting CFG.
*/
void propagateDivRemResults(Inst* originalInst, Node* originalInstNode,
Opnd* quot_lo, Opnd* quot_hi, Opnd* rem_lo, Opnd* rem_hi);
U_32 foundI8Opnds;
private:
/**
* Tests whether the type is subject for lowering.
*/
static bool isI8Type(Type * t)
{
return t->tag==Type::Int64 || t->tag==Type::UInt64;
}
/**
* Tests whether the given Inst represents I8PseudoInstruction of
* remainder calculation.
*/
static bool isI8RemInst(const Inst* inst)
{
if (!inst->hasKind(Inst::Kind_I8PseudoInst)) {
return false;
}
if (inst->getMnemonic() != Mnemonic_IDIV) {
return false;
}
if (inst->getOpndCount()<=3) {
return false;
}
// The additional fake parameter shows that in fact
// we need REMinder, not DIV. See InstCodeSelector
#ifdef _DEBUG
Opnd* fakeFlag = inst->getOpnd(3);
// Hard - coded values as they're hard coded in CodeSelector
// Just to check no one started to use the I8PseudoInst with
// IDIV mnemonic for anything else.
assert(isImm(fakeFlag) && fakeFlag->getImmValue()==12345678);
#endif
return true;
}
/**
* performs original mnemonic action on the new operands
*/
void applyMnemonic(Inst* inst, Opnd* dst, Opnd* dst_1, Opnd* dst_2,
Opnd* src1, Opnd* src1_1, Opnd* src1_2,
Opnd* src2, Opnd* src2_1, Opnd* src2_2);
/**
* check if the opnd is a volatile field of long type
*/
bool isLongVolatileMemoryOpnd(Opnd* opnd);
/**
* Points to the list of instructions to process.
*
* The pointer points to the local variable in runImpl();
*/
INST_ARRAY* m_pI8Insts;
/**
* Points to map of pairs 64 bits operand => two 32 bit operands.
*
* The pointer points to the local variable in runImpl();
*/
OPND_PAIRS_MAP* m_pairs;
/**
* Points to a map of Inst => header of the loop it belongs to.
*
* The pointer points to the local variable in runImpl();
*/
INST_TO_NODE_MAP* m_pLoopInfos;
};
static const char* help =
"inline_mul64=true/false\n"
" default=true. Inlines multiplication of 64 bits longs, instead of generating call to helper.\n"
"inline_mul64_checks=true/false\n"
" default=false. Adds additional checks whether multipliers are indeed I_32 for simpler operation.\n"
"inline_div64=true/false\n"
" default=true. Inlines division of 64 bits longs, instead of generating call to helper.\n"
"inline_rem64=true/false\n"
" default=true. Inlines calculation of remainder of 64 bits longs, instead of generating call to helper.\n"
"propagate_div_rem=true/false\n"
" default=true. Tries to match pairs 'A/B, A%B' and inline only single code sequence.\n"
"";
static ActionFactory <I8Lowerer> _i8l("i8l", help);
//_______________________________________________________________________________________________________________
// I8 operation internal helpers
int64 __stdcall imul64(const int64 src1, const int64 src2) stdcall__;
int64 __stdcall imul64(const int64 src1, const int64 src2)
{ return src1*src2; }
int64 __stdcall idiv64(const int64 src1, const int64 src2) stdcall__;
int64 __stdcall idiv64(const int64 src1, const int64 src2)
{ return src1/src2; }
int64 __stdcall irem64(const int64 src1, const int64 src2) stdcall__;
int64 __stdcall irem64(const int64 src1, const int64 src2)
{ return src1%src2; }
static void checkIR(IRManager* irm);
void I8Lowerer::runImpl()
{
// I8 operation internal helpers
irManager->registerInternalHelperInfo("imul64", IRManager::InternalHelperInfo((void*)&imul64,&CallingConvention_STDCALL));
irManager->registerInternalHelperInfo("idiv64", IRManager::InternalHelperInfo((void*)&idiv64,&CallingConvention_STDCALL));
irManager->registerInternalHelperInfo("irem64", IRManager::InternalHelperInfo((void*)&irem64,&CallingConvention_STDCALL));
// Initialize various xxUtils
setIRManager(irManager);
ControlFlowGraph* fg = irManager->getFlowGraph();
MemoryManager memMgr("i8lowerer");
StlMap<Opnd *,Opnd **> pairs(memMgr);
m_pairs = &pairs;
INST_ARRAY i8Insts(memMgr);
m_pI8Insts = &i8Insts;
INST_TO_NODE_MAP loopInfos(memMgr);
m_pLoopInfos = &loopInfos;
irManager->calculateOpndStatistics();
const Nodes* postOrder = &fg->getNodesPostOrder();
const LoopTree* loopTree = fg->getLoopTree();
//
// 1. Walk the CFG, collect Inst-s to be processed.
// Also, count the defs [for I8] operands
//
for (Nodes::const_reverse_iterator it = postOrder->rbegin(), end = postOrder->rend(); it!=end; ++it) {
Node* node = *it;
if (!node->isBlockNode()) {
continue;
}
for (Inst* inst = (Inst*)node->getFirstInst(); inst!=NULL; inst = inst->getNextInst()) {
bool doProcess =
inst->hasKind(Inst::Kind_I8PseudoInst) ||
inst->getMnemonic()==Mnemonic_CALL ||
inst->getMnemonic()==Mnemonic_RET ||
inst->hasKind(Inst::Kind_EntryPointPseudoInst) ||
inst->hasKind(Inst::Kind_AliasPseudoInst);
if (doProcess) {
i8Insts.push_back(inst);
Node* loopHeader = loopTree->getLoopHeader(node, false);
loopInfos[inst] = loopHeader;
}
}
}
// Will be used in propagateDivRem()
computeDominators();
for(StlVector<Inst *>::iterator it = i8Insts.begin(); it != i8Insts.end(); it++) {
Inst* inst = *it;
// Processes instructions get replaced with NULL
if (inst != NULL) {
processOpnds(inst);
*it = NULL;
}
}
fg->purgeEmptyNodes();
fg->purgeUnreachableNodes();
postOrder = &fg->getNodesPostOrder();
for (Nodes::const_reverse_iterator it = postOrder->rbegin(), end = postOrder->rend(); it!=end; ++it) {
Node * node= *it;
if (!node->isBlockNode()) {
continue;
}
Inst * cdq = NULL;
for (Inst* inst = (Inst*)node->getFirstInst(),*nextInst=NULL; inst!=NULL; inst = nextInst) {
nextInst = inst->getNextInst();
U_32 defCount = inst->getOpndCount(Inst::OpndRole_InstLevel|Inst::OpndRole_Def);
U_32 useCount = inst->getOpndCount(Inst::OpndRole_InstLevel|Inst::OpndRole_Use);
if(inst->getMnemonic() == Mnemonic_CDQ) {
if (inst->getNextInst()!=NULL && inst->getNextInst()->getMnemonic() == Mnemonic_IDIV) {
continue;
}
cdq = inst;
} else if ( cdq && inst->getMnemonic() == Mnemonic_AND &&
isZeroImm(inst->getOpnd(defCount+1)) &&
cdq->getOpnd(0)==inst->getOpnd(defCount)) {
Inst * tmpInst = irManager->newCopyPseudoInst(Mnemonic_MOV,inst->getOpnd(0), irManager->newImmOpnd(inst->getOpnd(0)->getType(),0));
tmpInst->insertAfter(inst);
inst->unlink();
inst = tmpInst;
cdq->unlink();
cdq = NULL;
} else if ( inst->getMnemonic() == Mnemonic_AND &&
isImm(inst->getOpnd(defCount+1)) &&
inst->getOpnd(defCount+1)->getImmValue() == 0xFFFFFFFF) {
Inst * tmpInst = irManager->newCopyPseudoInst(Mnemonic_MOV,inst->getOpnd(0), inst->getOpnd(defCount));
tmpInst->insertAfter(inst);
inst->unlink();
inst = tmpInst;
} else {
if (cdq) {
Opnd* defCDQ = cdq->getOpnd(0);
for (U_32 i=defCount;i<defCount+useCount;i++) {
if (defCDQ == inst->getOpnd(i)) {
cdq = NULL;
break;
}
}
}
} }
}
checkIR(irManager);
}
void I8Lowerer::processOpnds(Inst * inst)
{
Opnd * newOp1 = NULL, *newOp2 = NULL;
Opnd * dst_1 = NULL, * dst_2 = NULL, * src1_1 = NULL, * src1_2 = NULL, * src2_1 = NULL, * src2_2 = NULL;
Mnemonic mn = inst->getMnemonic();
if (mn==Mnemonic_CALL ||
mn==Mnemonic_RET ||
inst->hasKind(Inst::Kind_EntryPointPseudoInst) ||
inst->hasKind(Inst::Kind_AliasPseudoInst)) {
for(U_32 i = 0; i < inst->getOpndCount(); i++) {
Opnd * opnd = inst->getOpnd(i);
if (!isI8Type(opnd->getType())) {
continue;
}
foundI8Opnds = ~(U_32)0;
U_32 roles = inst->getOpndRoles(i);
if (inst->hasKind(Inst::Kind_AliasPseudoInst)) {
if (roles & Inst::OpndRole_Use) {
prepareNewOpnds(opnd,newOp1,newOp2);
inst->setOpnd(i, newOp1);
inst->insertOpnd(i+1, newOp2, roles);
inst->setConstraint(i, Constraint(OpndKind_Mem, OpndSize_32));
inst->setConstraint(i+1, Constraint(OpndKind_Mem, OpndSize_32));
i++;
}
} else {
prepareNewOpnds(opnd,newOp1,newOp2);
inst->setOpnd(i++, newOp1);
inst->insertOpnd(i, newOp2, roles);
}
}
} else if (inst->hasKind(Inst::Kind_I8PseudoInst)){
U_32 defCount = inst->getOpndCount(Inst::OpndRole_InstLevel|Inst::OpndRole_Def),
useCount = inst->getOpndCount(Inst::OpndRole_InstLevel|Inst::OpndRole_Use);
Opnd * dst = defCount > 0 ? inst->getOpnd(0) : NULL;
Opnd * src1 = useCount> 0 ? inst->getOpnd(defCount): NULL;
Opnd * src2 = useCount> 1 ? inst->getOpnd(defCount+1): NULL;
bool dstIsLongVolatile = false;
if (mn!=Mnemonic_IDIV && mn!=Mnemonic_IMUL) {
if (dst) {
if(isLongVolatileMemoryOpnd(dst)) {
// temporary dst placed on stack will be used in the original instruction
dstIsLongVolatile = true;
Type* typeInt32 = irManager->getTypeManager().getInt32Type();
Type* typeUInt32 = irManager->getTypeManager().getUInt32Type();
dst_2 = irManager->newOpnd(typeInt32, Constraint(RegName_EDX));
dst_1 = irManager->newOpnd(typeUInt32, Constraint(RegName_EAX));
} else {
prepareNewOpnds(dst,dst_1,dst_2);
}
}
if (src1) {
if(isLongVolatileMemoryOpnd(src1)) {
// prepare gp regs for CMPXCHG8B
Type* typeInt32 = irManager->getTypeManager().getInt32Type();
Type* typeUInt32 = irManager->getTypeManager().getUInt32Type();
Opnd* edx = irManager->newOpnd(typeInt32, Constraint(RegName_EDX));
Opnd* eax = irManager->newOpnd(typeUInt32, Constraint(RegName_EAX));
Opnd* ecx = irManager->newOpnd(typeInt32, Constraint(RegName_ECX));
Opnd* ebx = irManager->newOpnd(typeUInt32, Constraint(RegName_EBX));
Opnd* zero = irManager->newImmOpnd(typeInt32, 0);
irManager->newCopyPseudoInst(Mnemonic_MOV, edx, zero)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, eax, zero)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, ecx, edx)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, ebx, eax)->insertBefore(inst);
// read src1 -> EDX:EAX regs using CMPXCHG8B inst
Inst* cmpxchg = irManager->newCMPXCHG8BPseudoInst(src1,edx,eax,ecx,ebx);
cmpxchg->setPrefix(InstPrefix_LOCK);
cmpxchg->insertBefore(inst);
// let src1_1:src1_2 be EAX:EDX
src1_1 = eax;
src1_2 = edx;
} else {
prepareNewOpnds(src1,src1_1,src1_2);
}
}
if (src2) {
if(isLongVolatileMemoryOpnd(src2)) {
// prepare gp regs for CMPXCHG8B
Type* typeInt32 = irManager->getTypeManager().getInt32Type();
Type* typeUInt32 = irManager->getTypeManager().getUInt32Type();
Opnd* edx = irManager->newOpnd(typeInt32, Constraint(RegName_EDX));
Opnd* eax = irManager->newOpnd(typeUInt32, Constraint(RegName_EAX));
Opnd* ecx = irManager->newOpnd(typeInt32, Constraint(RegName_ECX));
Opnd* ebx = irManager->newOpnd(typeUInt32, Constraint(RegName_EBX));
Opnd* zero = irManager->newImmOpnd(typeInt32, 0);
irManager->newCopyPseudoInst(Mnemonic_MOV, edx, zero)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, eax, zero)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, ecx, edx)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, ebx, eax)->insertBefore(inst);
// read src2 -> EDX:EAX regs using CMPXCHG8B inst
Inst* cmpxchg = irManager->newCMPXCHG8BPseudoInst(src2,edx,eax,ecx,ebx);
cmpxchg->setPrefix(InstPrefix_LOCK);
cmpxchg->insertBefore(inst);
// let src2_1:src2_2 be EAX:EDX
src2_1 = eax;
src2_2 = edx;
} else {
prepareNewOpnds(src2,src2_1,src2_2);
}
}
}
applyMnemonic(inst,dst,dst_1,dst_2,src1,src1_1,src1_2,src2,src2_1,src2_2);
if(dstIsLongVolatile) {
// prepare gp regs for CMPXCHG8B
Type* typeInt32 = irManager->getTypeManager().getInt32Type();
Type* typeUInt32 = irManager->getTypeManager().getUInt32Type();
Opnd* ecx = irManager->newOpnd(typeInt32, Constraint(RegName_ECX));
Opnd* ebx = irManager->newOpnd(typeUInt32, Constraint(RegName_EBX));
irManager->newCopyPseudoInst(Mnemonic_MOV, ecx, dst_2)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, ebx, dst_1)->insertBefore(inst);
ControlFlowGraph * fg = irManager->getFlowGraph();
Node* prevNode = inst->getNode();
Node* nextNode = fg->splitNodeAtInstruction(inst, true, true, NULL);
Node* loopNode = fg->createNode(Node::Kind_Block);
Edge* prevOut = prevNode->getOutEdges().front();
fg->replaceEdgeTarget(prevOut, loopNode);
// write ECX:EBX -> dst using CMPXCHG8B inst
Inst* cmpxchg = irManager->newCMPXCHG8BPseudoInst(dst,dst_2,dst_1,ecx,ebx);
cmpxchg->setPrefix(InstPrefix_LOCK);
loopNode->appendInst(cmpxchg);
loopNode->appendInst(irManager->newBranchInst(Mnemonic_JNZ, loopNode, nextNode));
fg->addEdge(loopNode, loopNode, 0.5);
fg->addEdge(loopNode, nextNode, 0.5);
}
inst->unlink();
}
}
bool I8Lowerer::isLongVolatileMemoryOpnd(Opnd* opnd) {
if ( isI8Type(opnd->getType()) ) {
Opnd* disp = opnd->isPlacedIn(OpndKind_Memory) ? opnd->getMemOpndSubOpnd(MemOpndSubOpndKind_Displacement) : NULL;
Opnd::RuntimeInfo* ri = (disp == NULL) ? NULL : disp->getRuntimeInfo();
Opnd::RuntimeInfo::Kind riKind = (ri == NULL) ? Opnd::RuntimeInfo::Kind_Null : ri->getKind();
return (riKind == Opnd::RuntimeInfo::Kind_FieldOffset ||
riKind == Opnd::RuntimeInfo::Kind_StaticFieldAddress) &&
((FieldDesc*)disp->getRuntimeInfo()->getValue(0))->isVolatile();
} else {
return false;
}
}
void I8Lowerer::applyMnemonic(Inst* inst, Opnd* dst, Opnd* dst_1, Opnd* dst_2,
Opnd* src1, Opnd* src1_1, Opnd* src1_2,
Opnd* src2, Opnd* src2_1, Opnd* src2_2)
{
Mnemonic mn = inst->getMnemonic();
switch(mn) {
case Mnemonic_ADD :
assert(dst_1 && src1_1 && src2_1);
assert(dst_2 && src1_2 && src2_2);
irManager->newInstEx(Mnemonic_ADD, 1, dst_1, src1_1, src2_1)->insertBefore(inst);
irManager->newInstEx(Mnemonic_ADC, 1, dst_2, src1_2, src2_2)->insertBefore(inst);
break;
case Mnemonic_SUB :
assert(dst_1 && src1_1 && src2_1);
assert(dst_2 && src1_2 && src2_2);
irManager->newInstEx(Mnemonic_SUB, 1, dst_1, src1_1, src2_1)->insertBefore(inst);
irManager->newInstEx(Mnemonic_SBB, 1, dst_2, src1_2, src2_2)->insertBefore(inst);
break;
case Mnemonic_AND :
case Mnemonic_OR :
case Mnemonic_XOR :
assert(dst_1 && src1_1 && src2_1);
assert(dst_2 && src1_2 && src2_2);
case Mnemonic_NOT :
assert(dst_1 && src1_1);
assert(dst_2 && src1_2);
irManager->newInstEx(mn, 1, dst_1, src1_1, src2_1)->insertBefore(inst);
irManager->newInstEx(mn, 1, dst_2, src1_2, src2_2)->insertBefore(inst);
break;
case Mnemonic_MOV :
assert(dst_1 && src1_1);
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, src1_1)->insertBefore(inst);
if (dst_2 && src1_2) {
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, src1_2)->insertBefore(inst);
}
break;
case Mnemonic_MOVSX :
case Mnemonic_MOVZX :
assert(dst_1 && dst_2 && src1_1);
assert(!src1_2);
if (src1_1->getSize()<OpndSize_32){
irManager->newInstEx(mn, 1, dst_1, src1_1)->insertBefore(inst);
}else{
assert(src1_1->getSize()==OpndSize_32);
irManager->newInstEx(Mnemonic_MOV, 1, dst_1, src1_1)->insertBefore(inst);
}
if (mn==Mnemonic_MOVSX){
// It's possible to substitute complex CDQ with a tight
// constraints to the set of simpler instructions
// with a wider constraints to let more freedom
// to regalloc and constraint resolver.
// However, this seems does not change anything currently,
// so leaving as-is.
//test low, low
//setns hi ; if lo is positive, then load 1 into hi
//sub hi, 1 ; if lo is positive, then hi is now '0'. otherwise, it's -1
irManager->newInstEx(Mnemonic_CDQ, 1, dst_2, dst_1)->insertBefore(inst);
} else {
//fill upper word with 0
assert(mn == Mnemonic_MOVZX);
Opnd* imm0=irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), 0);
irManager->newInstEx(Mnemonic_MOV, 1, dst_2, imm0)->insertBefore(inst);
}
break;
case Mnemonic_PUSH :
assert(src1_1);
assert(src1_2);
irManager->newInstEx(Mnemonic_PUSH, 0, src1_2)->insertBefore(inst);
irManager->newInstEx(Mnemonic_PUSH, 0, src1_1)->insertBefore(inst);
break;
case Mnemonic_POP :
assert(dst_1);
assert(dst_2);
irManager->newInstEx(Mnemonic_POP, 1, dst_1)->insertBefore(inst);
irManager->newInstEx(Mnemonic_POP, 1, dst_2)->insertBefore(inst);
break;
case Mnemonic_SHL :
{
assert(dst && src1 && src2);
buildShiftSubGraph(inst, src1_2, src1_1, src2, dst_2, dst_1, mn, Mnemonic_SHR);
break;
}
case Mnemonic_SHR :
case Mnemonic_SAR :
{
assert(dst && src1 && src2);
buildShiftSubGraph(inst, src1_1, src1_2, src2, dst_1, dst_2, mn, Mnemonic_SHL);
break;
}
case Mnemonic_CMP :
{
assert(src1 && src2);
Inst * condInst = inst->getNextInst();
while (condInst && condInst->getMnemonic() == Mnemonic_MOV) {
condInst = condInst->getNextInst();
}
Mnemonic mnem = condInst ? getBaseConditionMnemonic(condInst->getMnemonic()) : Mnemonic_NULL;
if (mnem != Mnemonic_NULL) {
if(condInst->hasKind(Inst::Kind_BranchInst)) {
compareAndBranch(inst,src1_1,src1_2,src2_1,src2_2,condInst);
} else {
buildSetSubGraph(inst,src1_1,src1_2,src2_1,src2_2,condInst);
}
} else {
buildComplexSubGraph(inst,src1_1,src1_2,src2_1,src2_2);
}
break;
}
case Mnemonic_IMUL:
lowerMul64(inst);
break;
case Mnemonic_IDIV:
if (isI8RemInst(inst)) {
lowerRem64(inst);
}
else {
lowerDiv64(inst);
}
break;
default :
assert(0);
}
} // applyMnemonic
void I8Lowerer::buildShiftSubGraph(Inst * inst, Opnd * src1_1, Opnd * src1_2, Opnd * src2, Opnd * dst_1, Opnd * dst_2, Mnemonic mnem, Mnemonic opMnem) {
Opnd * dst1_1 = irManager->newOpnd(dst_2->getType()),
* dst1_2 = irManager->newOpnd(dst_2->getType()),
* tmpSrc2 = irManager->newOpnd(src2->getType());
if(src2->isPlacedIn(OpndKind_Imm)) {
int64 immVal = src2->getImmValue();
immVal &= 63;
if (immVal == 32) {
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, src1_2)->insertBefore(inst);
if (mnem != Mnemonic_SAR) {
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, irManager->newImmOpnd(src1_1->getType(), 0))->insertBefore(inst);
} else {
irManager->newInstEx(mnem, 1, dst_2, src1_2, irManager->newImmOpnd(src2->getType(), 31))->insertBefore(inst);
}
} else if(immVal > 31) {
if (mnem != Mnemonic_SAR) {
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, irManager->newImmOpnd(src1_1->getType(), 0))->insertBefore(inst);
} else {
irManager->newInstEx(mnem, 1, dst_2, src1_2, irManager->newImmOpnd(src2->getType(), 31))->insertBefore(inst);
}
irManager->newInstEx(mnem, 1, dst_1, src1_2, src2)->insertBefore(inst);
} else if (immVal == 0) {
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, src1_2)->insertBefore(inst);
irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, src1_1)->insertBefore(inst);
} else {
irManager->newInstEx(mnem == Mnemonic_SAR?Mnemonic_SHR:mnem, 1, dst1_1, src1_1, src2)->insertBefore(inst);
irManager->newInstEx(opMnem, 1, dst1_2, src1_2, irManager->newImmOpnd(src2->getType(), 32-immVal))->insertBefore(inst);
irManager->newInstEx(Mnemonic_OR, 1, dst_1, dst1_2, dst1_1)->insertBefore(inst);
irManager->newInstEx(mnem, 1, dst_2, src1_2, src2)->insertBefore(inst);
}
} else {
ControlFlowGraph* subCFG = irManager->createSubCFG(true, false);
Node* bbMain = subCFG->getEntryNode(),
* lNode = subCFG->createBlockNode(),
* gNode = subCFG->createBlockNode(),
* nullNode = subCFG->createBlockNode(),
* cmpNullNode = subCFG->createBlockNode();
bbMain->appendInst(irManager->newInstEx(Mnemonic_AND, 1, src2, src2, irManager->newImmOpnd(src2->getType(), 63)));
bbMain->appendInst(irManager->newInst(Mnemonic_CMP, src2, irManager->newImmOpnd(src2->getType(), 31)));
bbMain->appendInst(irManager->newBranchInst(getMnemonic(Mnemonic_Jcc, ConditionMnemonic_LE), cmpNullNode, gNode));
cmpNullNode->appendInst(irManager->newInst(Mnemonic_CMP, src2, irManager->newImmOpnd(src2->getType(), 0)));
cmpNullNode->appendInst(irManager->newBranchInst(getMnemonic(Mnemonic_Jcc, ConditionMnemonic_E), nullNode, lNode));
nullNode->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, src1_1));
nullNode->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, src1_2));
if (mnem == Mnemonic_SAR)
gNode->appendInst(irManager->newInstEx(mnem, 1, dst_2, src1_2, irManager->newImmOpnd(src2->getType(), 31)));
else
gNode->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_2, irManager->newImmOpnd(dst_2->getType(), 0)));
gNode->appendInst(irManager->newInstEx(mnem == Mnemonic_SAR?Mnemonic_SHR:mnem, 1, dst_1, src1_2, src2));
lNode->appendInst(irManager->newInstEx(mnem == Mnemonic_SAR?Mnemonic_SHR:mnem, 1, dst1_1, src1_1, src2));
lNode->appendInst(irManager->newInstEx(Mnemonic_SUB, 1, tmpSrc2, irManager->newImmOpnd(src2->getType(), 32), src2));
lNode->appendInst(irManager->newInstEx(opMnem, 1, dst1_2, src1_2, tmpSrc2));
lNode->appendInst(irManager->newInstEx(Mnemonic_OR, 1, dst_1, dst1_2, dst1_1));
lNode->appendInst(irManager->newInstEx(mnem, 1, dst_2, src1_2, src2));
Node* sinkNode = subCFG->getReturnNode();
subCFG->addEdge(bbMain, cmpNullNode, 0.5);
subCFG->addEdge(bbMain, gNode, 0.5);
subCFG->addEdge(cmpNullNode, nullNode, 0.5);
subCFG->addEdge(cmpNullNode, lNode, 0.5);
subCFG->addEdge(gNode, sinkNode, 1);
subCFG->addEdge(lNode, sinkNode, 1);
subCFG->addEdge(nullNode, sinkNode, 1);
irManager->getFlowGraph()->spliceFlowGraphInline(inst, *subCFG);
}
}
void I8Lowerer::compareAndBranch(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst) {
ControlFlowGraph* cfg = irManager->getFlowGraph();
Node * bb = inst->getNode();
Edge* dbEdge = bb->getTrueEdge();
Edge* ftEdge = bb->getFalseEdge();
Node* bbDB = dbEdge->getTargetNode();
Node* bbFT = ftEdge->getTargetNode();
Mnemonic mn = condInst->getMnemonic();
//for == and != algorithms checks low parts first
if (mn == Mnemonic_JNZ) { // !=
Node* cmpHiNode = cfg->createBlockNode();
cfg->replaceEdgeTarget(ftEdge, cmpHiNode, true);
//if (lo1!=lo2) return true;
bb->appendInst(irManager->newInst(Mnemonic_CMP, src1_1, src2_1));
bb->appendInst(irManager->newBranchInst(Mnemonic_JNZ, bbDB, cmpHiNode));
//if (hi1!=hi2) return true;
cmpHiNode->appendInst(irManager->newInst(Mnemonic_CMP, src1_2, src2_2));
cmpHiNode->appendInst(irManager->newBranchInst(Mnemonic_JNZ, bbDB, bbFT));
cfg->addEdge(cmpHiNode, bbDB, dbEdge->getEdgeProb());
cfg->addEdge(cmpHiNode, bbFT, ftEdge->getEdgeProb());
} else if (mn == Mnemonic_JZ) { // ==
Node* cmpHiNode = cfg->createBlockNode();
cfg->replaceEdgeTarget(dbEdge, cmpHiNode, true);
//if lo1==lo2 check hi parts
bb->appendInst(irManager->newInst(Mnemonic_CMP, src1_1, src2_1));
bb->appendInst(irManager->newBranchInst(Mnemonic_JZ, cmpHiNode, bbFT));
//if hi1==hi2 -> return true
cmpHiNode->appendInst(irManager->newInst(Mnemonic_CMP, src1_2, src2_2));
cmpHiNode->appendInst(irManager->newBranchInst(Mnemonic_JZ, bbDB, bbFT));
cfg->addEdge(cmpHiNode, bbDB, dbEdge->getEdgeProb());
cfg->addEdge(cmpHiNode, bbFT, ftEdge->getEdgeProb());
} else { // >=, >, <=, <
//if hi parts equals -> compare low parts
//else compare hi parts
Node* cmpHiNode = cfg->createBlockNode();
Node* cmpLoNode = cfg->createBlockNode();
//check if hi parts are equal
cfg->replaceEdgeTarget(dbEdge, cmpHiNode, true);
cfg->replaceEdgeTarget(ftEdge, cmpLoNode, true);
bb->appendInst(irManager->newInst(Mnemonic_CMP, src1_2, src2_2));
bb->appendInst(irManager->newBranchInst(Mnemonic_JNE, cmpHiNode, cmpLoNode));
//check hi parts: reuse old CMP result here
cmpHiNode->appendInst(irManager->newBranchInst(mn, bbDB, bbFT));
cfg->addEdge(cmpHiNode, bbDB, dbEdge->getEdgeProb());
cfg->addEdge(cmpHiNode, bbFT, ftEdge->getEdgeProb());
//check lo parts, use unsigned comparison
Mnemonic unsignedMn = Mnemonic_NULL;
switch(mn) {
case Mnemonic_JL : unsignedMn = Mnemonic_JB; break;
case Mnemonic_JLE : unsignedMn = Mnemonic_JBE; break;
case Mnemonic_JG : unsignedMn = Mnemonic_JA; break;
case Mnemonic_JGE : unsignedMn = Mnemonic_JAE; break;
default: unsignedMn = mn; break;
}
cmpLoNode->appendInst(irManager->newInst(Mnemonic_CMP, src1_1, src2_1));
cmpLoNode->appendInst(irManager->newBranchInst(unsignedMn, bbDB, bbFT));
cfg->addEdge(cmpLoNode, bbDB, dbEdge->getEdgeProb());
cfg->addEdge(cmpLoNode, bbFT, ftEdge->getEdgeProb());
}
condInst->unlink();
}
void I8Lowerer::buildSetSubGraph(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst) {
ControlFlowGraph* subCFG = irManager->createSubCFG(true, false);
Node* bbHMain = subCFG->getEntryNode();
Node* bbHF = subCFG->createBlockNode();
Node* bbLMain = subCFG->createBlockNode();
Node* sinkNode = subCFG->getReturnNode();
bbHMain->appendInst(irManager->newInst(Mnemonic_CMP, src1_2, src2_2));
bbHMain->appendInst(irManager->newBranchInst(Mnemonic_JNZ, bbHF, bbLMain));
bbLMain->appendInst(irManager->newInst(Mnemonic_CMP, src1_1, src2_1));
irManager->getFlowGraph()->splitNodeAtInstruction(condInst, true, true, NULL);
Mnemonic mnem = getBaseConditionMnemonic(condInst->getMnemonic());
Inst * nextInst = inst->getNextInst();
if(nextInst->getMnemonic() == Mnemonic_MOV) {
bbHF->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, nextInst->getOpnd(0), nextInst->getOpnd(1)));
bbLMain->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, nextInst->getOpnd(0), nextInst->getOpnd(1)));
nextInst->unlink();
} else {
assert(nextInst == condInst);
}
bbHF->appendInst(irManager->newInst(condInst->getMnemonic(), condInst->getOpnd(0), mnem == Mnemonic_CMOVcc? condInst->getOpnd(1): NULL));
ConditionMnemonic condMnem = ConditionMnemonic(condInst->getMnemonic()-mnem);
switch(condMnem) {
case ConditionMnemonic_L : condMnem = ConditionMnemonic_B; break;
case ConditionMnemonic_LE : condMnem = ConditionMnemonic_BE; break;
case ConditionMnemonic_G : condMnem = ConditionMnemonic_A; break;
case ConditionMnemonic_GE : condMnem = ConditionMnemonic_AE; break;
default: break;
}
bbLMain->appendInst(irManager->newInst(Mnemonic(mnem+condMnem), condInst->getOpnd(0), mnem == Mnemonic_CMOVcc? condInst->getOpnd(1): NULL));
subCFG->addEdge(bbHMain, bbHF, 0.1);
subCFG->addEdge(bbHMain, bbLMain, 0.9);
subCFG->addEdge(bbLMain, sinkNode, 1);
subCFG->addEdge(bbHF, sinkNode, 1);
condInst->unlink();
irManager->getFlowGraph()->spliceFlowGraphInline(inst, *subCFG);
}
void I8Lowerer::buildComplexSubGraph(Inst * inst, Opnd * src1_1,Opnd * src1_2,Opnd * src2_1,Opnd * src2_2, Inst * condInst) {
Opnd * dst_1 = irManager->newOpnd(irManager->getTypeManager().getInt32Type());
ControlFlowGraph* subCFG = irManager->createSubCFG(true, false);
Node* bbHMain = subCFG->getEntryNode();
Node* bbHF = subCFG->createBlockNode();
Node* bbHS = subCFG->createBlockNode();
Node* bbLMain = subCFG->createBlockNode();
Node* bbLF = subCFG->createBlockNode();
Node* bbLS = subCFG->createBlockNode();
Node* sinkNode = subCFG->getReturnNode();
bbHMain->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), 0)));
bbHMain->appendInst(irManager->newInst(Mnemonic_CMP, src1_2, src2_2));
bbHMain->appendInst(irManager->newBranchInst(Mnemonic_JZ, bbLMain, bbHF));
bbHF->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), -1)));
bbHF->appendInst(irManager->newBranchInst(Mnemonic_JG, bbHS, sinkNode));
bbHS->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), 1)));
bbLMain->appendInst(irManager->newInst(Mnemonic_CMP, src1_1, src2_1));
bbLMain->appendInst(irManager->newBranchInst(Mnemonic_JZ, sinkNode, bbLF));
bbLF->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), -1)));
bbLF->appendInst(irManager->newBranchInst(Mnemonic_JA, bbLS, sinkNode));
bbLS->appendInst(irManager->newCopyPseudoInst(Mnemonic_MOV, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), 1)));
sinkNode->appendInst(irManager->newInst(Mnemonic_CMP, dst_1, irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), 0)));
subCFG->addEdge(bbHMain, bbHF, 0.1);
subCFG->addEdge(bbHMain, bbLMain, 0.9);
subCFG->addEdge(bbHF, sinkNode, 0.5);
subCFG->addEdge(bbHF, bbHS, 0.5);
subCFG->addEdge(bbHS, sinkNode, 1);
subCFG->addEdge(bbLMain, bbLF, 0.1);
subCFG->addEdge(bbLMain, sinkNode, 0.9);
subCFG->addEdge(bbLF, sinkNode, 0.5);
subCFG->addEdge(bbLF, bbLS, 0.5);
subCFG->addEdge(bbLS, sinkNode, 1);
irManager->getFlowGraph()->spliceFlowGraphInline(inst, *subCFG);
}
void I8Lowerer::prepareNewOpnds(Opnd * longOpnd, Opnd*& newOp1, Opnd*& newOp2)
{
if (!isI8Type(longOpnd->getType())){
newOp1=longOpnd;
newOp2=NULL;
return;
}
if(m_pairs->find(longOpnd)!=m_pairs->end()) {
newOp1 = (*m_pairs)[longOpnd][0];
newOp2 = (*m_pairs)[longOpnd][1];
} else {
if(longOpnd->isPlacedIn(OpndKind_Memory)) {
newOp1 = irManager->newOpnd(irManager->getTypeManager().getUInt32Type());
newOp2 = irManager->newOpnd(irManager->getTypeManager().getInt32Type());
Opnd * opnds[2] = { newOp1, newOp2 };
irManager->assignInnerMemOpnds(longOpnd, opnds, 2);
} else if(longOpnd->isPlacedIn(OpndKind_Imm)){
newOp1 = irManager->newImmOpnd(irManager->getTypeManager().getUInt32Type(), (U_32)longOpnd->getImmValue());
newOp2 = irManager->newImmOpnd(irManager->getTypeManager().getInt32Type(), (I_32)(longOpnd->getImmValue()>>32));
} else {
newOp1 = irManager->newOpnd(irManager->getTypeManager().getUInt32Type());
newOp2 = irManager->newOpnd(irManager->getTypeManager().getInt32Type());
}
(*m_pairs)[longOpnd] = new(irManager->getMemoryManager()) Opnd*[2];
(*m_pairs)[longOpnd][0]=newOp1;
(*m_pairs)[longOpnd][1]=newOp2;
}
}
void I8Lowerer::lowerMul64(Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
assert(dst && src1 && src2);
if ( isZeroImm(src1) || isZeroImm(src2)) {
// Multiplue by zero - noop.
inst->unlink();
return;
}
bool inline_mul64 = true;
getArg("inline_mul64", inline_mul64);
if (!inline_mul64) {
Opnd * args[2]={ src1, src2 };
CallInst * callInst = irManager->newInternalRuntimeHelperCallInst("imul64", 2, args, dst);
callInst->insertBefore(inst);
processOpnds(callInst);
inst->unlink();
return;
}
inlineMul64(inst);
}
void I8Lowerer::inlineMul64(Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
bool inline_mul64_checks = false;
getArg("inline_mul64_checks", inline_mul64_checks);
Opnd * dst_1 = NULL, * dst_2 = NULL;
Opnd * src1_1 = NULL, * src1_2 = NULL;
Opnd * src2_1 = NULL, * src2_2 = NULL;
prepareNewOpnds(dst, dst_1, dst_2);
prepareNewOpnds(src1, src1_1, src1_2);
prepareNewOpnds(src2, src2_1, src2_2);
TypeManager& tm = irManager->getTypeManager();
Type* int32type = tm.getInt32Type();
// Name them eax, ecx, edx here, in code only, to refer same regs as in
// the comments, but let the register allocator to decide the proper
// ones.
Opnd*eax;
Opnd*edx;
Opnd*ecx;
eax = irManager->newOpnd(int32type);
edx = irManager->newOpnd(int32type);
ecx = irManager->newOpnd(int32type);
Opnd* zero = irManager->newImmOpnd(int32type, 0);
//
Opnd* a_lo = src1_1;
Opnd* a_hi = src1_2;
Opnd* b_lo = src2_1;
Opnd* b_hi = src2_2;
Opnd* res_lo = dst_1;
Opnd* res_hi = dst_2;
/*
Strategy:
result = a*b ;
a = a_lo + a_hi*(1<<32)
b = b_lo + b_hi*(1<<32)
- a*b = (a_lo + a_hi*(1<<32))*(b_lo + b_hi*(1<<32)) =
a_lo*b_lo + a_hi*(1<<32)*b_lo + a_lo*b_hi*(1<<32) + a_hi*(1<<32)*b_hi*(1<<32)
- a_hi*(1<<32)*b_hi*(1<<32) - can be dropped off, it's overflow
- with any of `a_hi*b_lo*(1<<32)` + `a_lo*b_hi*(1<<32)` we can ignore high 32 bits
of result (EDX) - these are overflow again
So, we're getting:
a_lo*b_lo + a_hi*b_lo*(1<<32) + a_lo*b_hi*(1<<32)
(1) (2) (3)
*/
//
if (inline_mul64_checks) {
newSubGFG();
Node* entryNode = getSubCfgEntryNode();
Node* longMulNode = newBB();
Node* storeResultNode = newBB();
setCurrentNode(NULL);
Node* test_A_Node = newBB();
Node* test_B_Node = newBB();
Node* simpleMulNode = newBB();
setCurrentNode(NULL);
connectNodes(entryNode, test_A_Node);
setCurrentNode(test_A_Node);
//
// test_A_Node:
//
/* cmp a_hi, 0*/ newInst(Mnemonic_CMP, a_hi, zero);
/* jnz longMul */ newBranch(Mnemonic_JNZ, longMulNode, test_B_Node);
setCurrentNode(NULL);
test_A_Node = (Node*)0xDEADBEEF;
//
// test_B_Node:
//
setCurrentNode(test_B_Node);
/* cmp b_hi, 0*/ newInst(Mnemonic_CMP, b_hi, zero);
/* jnz longMul */ newBranch(Mnemonic_JNZ, longMulNode, simpleMulNode);
setCurrentNode(NULL);
test_B_Node = (Node*)0xDEADBEEF;
//
// simpleMulNode:
//
setCurrentNode(simpleMulNode);
/* mov eax, a_lo */ newInst(Mnemonic_MOV, eax, a_lo);
/* imul b_lo */ newInst(Mnemonic_MUL, 2, edx, eax, eax, b_lo);
connectNodeTo(storeResultNode);
setCurrentNode(NULL);
simpleMulNode = (Node*)0xDEADBEEF;
entryNode = (Node*)0xDEADBEEF;
setCurrentNode(longMulNode);
/* mov eax, a_lo*/ newInst(Mnemonic_MOV, eax, a_lo);
/* mul b_hi */ newInst(Mnemonic_MUL, 2, edx, eax, eax, b_hi);
//
// Now, EAX=a_lo*b_hi, EDX content is dropped
//
// save a_lo*b_hi(EAX)
/* mov ecx, eax */ newInst(Mnemonic_MOV, ecx, eax);
/* mov eax, a_hi*/ newInst(Mnemonic_MOV, eax, a_hi);
/* mul b_lo */ newInst(Mnemonic_MUL, 2, edx, eax, eax, b_lo);
/* add ecx, eax */ newInst(Mnemonic_ADD, 1, ecx, ecx, eax);
/* mov eax, a_lo */ newInst(Mnemonic_MOV, eax, a_lo);
/* mul b_lo */ newInst(Mnemonic_MUL, 2, edx, eax, eax, b_lo);
/* add edx, ecx */ newInst(Mnemonic_ADD, 1, edx, edx, ecx);
connectNodes(longMulNode, storeResultNode);
setCurrentNode(NULL);
longMulNode = (Node*)0xDEADBEEF;
//
// storeResultNode:
//
setCurrentNode(storeResultNode);
/* mov res_lo, eax*/ newInst(Mnemonic_MOV, res_lo, eax);
/* mov res_hi, edx*/ newInst(Mnemonic_MOV, res_hi, edx);
setCurrentNode(NULL);
connectNodes(storeResultNode, getSubCfgReturnNode());
propagateSubCFG(inst);
} else {
/* mov eax, a_lo */ irManager->newCopyPseudoInst(Mnemonic_MOV, eax, a_lo)->insertBefore(inst);
/* mul b_hi */ irManager->newInstEx(Mnemonic_MUL, 2, edx, eax, eax, b_hi, NULL)->insertBefore(inst);
// Now, EAX=low32bit(a_lo*b_hi), EDX content is dropped
/* mov ecx, eax */ irManager->newCopyPseudoInst(Mnemonic_MOV, ecx, eax)->insertBefore(inst);
// Now, a_lo*b_hi is saved to ECX
/* mov eax, a_hi*/ irManager->newCopyPseudoInst(Mnemonic_MOV, eax, a_hi)->insertBefore(inst);
/* mul b_lo */ irManager->newInstEx(Mnemonic_MUL, 2, edx, eax, eax, b_lo, NULL)->insertBefore(inst);
// Now, EAX=low32bit(a_hi*b_lo), EDX content is dropped
/* add ecx, eax */ irManager->newInstEx(Mnemonic_ADD, 1, ecx, ecx, eax)->insertBefore(inst);
// Now, ECX=low32bit(a_lo*b_hi+a_hi*b_lo)
/* mov eax, a_lo */ irManager->newCopyPseudoInst(Mnemonic_MOV, eax, a_lo)->insertBefore(inst);
/* mul b_lo */ irManager->newInstEx(Mnemonic_MUL, 2, edx, eax, eax, b_lo, NULL)->insertBefore(inst);
// Now, EAX=low32bit(a_lo*b_lo) and EDX=high32bit(a_lo*b_lo)
/* add edx, ecx */ irManager->newInstEx(Mnemonic_ADD, 1, edx, edx, ecx)->insertBefore(inst);
// Now, EDX=low32bit(a_lo*b_hi+a_hi*b_lo)+high32bit(a_lo*b_lo)
/* mov res_lo, eax*/ irManager->newCopyPseudoInst(Mnemonic_MOV, res_lo, eax)->insertBefore(inst);
/* mov res_hi, edx*/ irManager->newCopyPseudoInst(Mnemonic_MOV, res_hi, edx)->insertBefore(inst);
}
}
void I8Lowerer::lowerDiv64(Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
assert(dst && src1 && src2);
bool inline_div64 = true;
getArg("inline_div64", inline_div64);
if (!inline_div64) {
Opnd * args[2]={ src1, src2 };
CallInst * callInst = irManager->newInternalRuntimeHelperCallInst("idiv64", 2, args, dst);
callInst->insertBefore(inst);
processOpnds(callInst);
inst->unlink();
return;
}
inlineDivRem64(false, inst);
}
void I8Lowerer::lowerRem64(Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
assert(dst && src1 && src2);
bool inline_rem64 = true;
getArg("inline_rem64", inline_rem64);
if (!inline_rem64) {
Opnd * args[2]={ src1, src2 };
CallInst * callInst = irManager->newInternalRuntimeHelperCallInst("irem64", 2, args, dst);
callInst->insertBefore(inst);
processOpnds(callInst);
inst->unlink();
return;
}
inlineDivRem64(true, inst);
}
void I8Lowerer::inlineDivRem64(bool wantReminder, Inst* inst)
{
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* dst = inst->getOpnd(0);
Opnd* src1 = inst->getOpnd(1);
Opnd* src2 = inst->getOpnd(2);
Opnd * res_lo = NULL, * res_hi = NULL;
Opnd * a_lo = NULL, * a_hi = NULL;
Opnd * b_lo = NULL, * b_hi = NULL;
prepareNewOpnds(dst, res_lo, res_hi);
prepareNewOpnds(src1, a_lo, a_hi);
prepareNewOpnds(src2, b_lo, b_hi);
//
TypeManager& tm = irManager->getTypeManager();
Type* int32type = tm.getInt32Type();
Opnd* edx = irManager->newOpnd(int32type);
Opnd* eax = irManager->newOpnd(int32type);
Opnd* ecx = irManager->newOpnd(int32type);
Opnd* ebx = irManager->newOpnd(int32type);
Opnd* edi = irManager->newOpnd(int32type);
Opnd* esi = irManager->newOpnd(int32type);
Opnd* temp_lo = irManager->newOpnd(int32type);
Opnd* temp_hi = irManager->newOpnd(int32type);
Opnd* zero = irManager->newImmOpnd(int32type, 0);
Opnd* one = irManager->newImmOpnd(int32type, 1);
//
newSubGFG();
Node* entryNode = getSubCfgEntryNode();
Node* check_A_SignNode = newBB();
Node* neg_A_Node = newBB();
Node* check_B_SignNode = newBB();
Node* neg_B_Node = newBB();
Node* testBigDvsrNode = newBB();
Node* testOneDivNode = newBB();
Node* simpleDivNode = newBB();
Node* oneDivNode = newBB();
Node* bigDivisorNode = newBB();
Node* storeResultNode = newBB();
//
// Here we go.
// The algorithm is based on unsigned div algo from assembly gems page
// http://www.df.lth.se/~john_e/gems/gem002a.html (also published in AMD
// optimization manual) and modified for signed arithmetics and for
// the Jitrino CG specifics.
//
//
// entryNode:
//
// Preparation - load all into 'registers'
//
Opnd* fixQuotSign = irManager->newOpnd(int32type);
Opnd* fixRemSign = irManager->newOpnd(int32type);
// Sign presumed positive
setCurrentNode(entryNode);
/* mov fixQuotSign, 0*/ newInst(Mnemonic_MOV, fixQuotSign, zero);
/* mov fixRemSign, 0*/ newInst(Mnemonic_MOV, fixRemSign, zero);
/* mov edx, a_hi*/ newInst(Mnemonic_MOV, edx, a_hi);
/* mov eax, a_lo*/ newInst(Mnemonic_MOV, eax, a_lo);
/* mov ecx, b_hi*/ newInst(Mnemonic_MOV, ecx, b_hi);
/* mov ebx, b_lo*/ newInst(Mnemonic_MOV, ebx, b_lo);
connectNodeTo(check_A_SignNode);
setCurrentNode(NULL);
entryNode = (Node*)0xDEADBEEF;
//
// check_A_SignNode:
//
// Test for signs and convert to unsigned when needed
//
setCurrentNode(check_A_SignNode);
/* test edx, edx*/ newInst(Mnemonic_TEST, edx, edx);
/* jns check_B_Sign*/ newBranch(Mnemonic_JNS, check_B_SignNode, neg_A_Node);
setCurrentNode(NULL);
check_A_SignNode = (Node*)0xDEADBEEF;
//
// neg_A:
//
//
setCurrentNode(neg_A_Node);
/* mov fixQuotSign, 1*/ newInst(Mnemonic_MOV, fixQuotSign, one);
/* mov fixRemSign, 1*/ newInst(Mnemonic_MOV, fixRemSign, one);
/* neg eax */ newInst(Mnemonic_NEG, 1, eax, eax);
/* adc edx, 0 */ newInst(Mnemonic_ADC, 1, edx, edx, zero);
/* neg edx */ newInst(Mnemonic_NEG, 1, edx, edx);
connectNodeTo(check_B_SignNode);
setCurrentNode(NULL);
neg_A_Node = (Node*)0xDEADBEEF;
//
// check_B_Sign_Node:
//
setCurrentNode(check_B_SignNode);
/* test ecx, ecx*/ newInst(Mnemonic_TEST, ecx, ecx);
/* jns testBigDvsrNode*/ newBranch(Mnemonic_JNS, testBigDvsrNode, neg_B_Node);
setCurrentNode(NULL);
check_B_SignNode = (Node*)0xDEADBEEF;
//
// neg_B_Node:
//
// When doing REM, the result's sing only depends on
// dividend's sign no need to change fixRemSign
setCurrentNode(neg_B_Node);
/* XOR fixQuotSign, 1*/ newInst(Mnemonic_XOR, 1, fixQuotSign, fixQuotSign, one);
/* neg ebx */ newInst(Mnemonic_NEG, 1, ebx, ebx);
/* adc ecx, 0 */ newInst(Mnemonic_ADC, 1, ecx, ecx, zero);
/* neg ecx */ newInst(Mnemonic_NEG, 1, ecx, ecx);
connectNodeTo(testBigDvsrNode);
setCurrentNode(NULL);
neg_B_Node = (Node*)0xDEADBEEF;
//
// testBigDvsr:
//
// divisor > 2^32-1 ?
setCurrentNode(testBigDvsrNode);
/* cmp ecx, 0 */ newInst(Mnemonic_CMP, ecx, zero);
// YES - proceed to the hard way.
// NO - fall to the simpler path
/* jnz BIG_DVSOR*/ newBranch(Mnemonic_JNZ, bigDivisorNode, testOneDivNode);
setCurrentNode(NULL);
testBigDvsrNode = (Node*)0xDEADBEEF;
//
// testOneDivNode:
//
//
//only one division needed ? (ecx = 0)
setCurrentNode(testOneDivNode);
/* cmp edx, ebx */ newInst(Mnemonic_CMP, edx, ebx);
// YES - one division sufficient
/* jb ONE_DIV */ newBranch(Mnemonic_JB, oneDivNode, simpleDivNode);
setCurrentNode(NULL);
testOneDivNode = (Node*)0xDEADBEEF;
//
// simpleDivNode:
//
setCurrentNode(simpleDivNode);
/* mov ecx, eax */ newInst(Mnemonic_MOV, ecx, eax); // save dividend-lo in ecx
/* mov eax, edx */ newInst(Mnemonic_MOV, eax, edx); // get dividend-hi
/* xor edx, edx */ newInst(Mnemonic_MOV, edx, zero); // zero extend it into edx:eax
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx); // quotient-hi in eax
/* xchg eax, ecx*/ newInst(Mnemonic_XCHG, eax, ecx); //ecx = quotient-hi, eax =dividend-lo
connectNodeTo(oneDivNode);
setCurrentNode(NULL);
simpleDivNode = (Node*)0xDEADBEEF;
//
// oneDivNode:
//
setCurrentNode(oneDivNode);
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx); // eax = quotient-lo
/* mov ebx, edx */ newInst(Mnemonic_MOV, ebx, edx); // ebx = remainder-lo
/* mov edx, ecx */ newInst(Mnemonic_MOV, edx, ecx); // edx = quotient-hi(quotient in edx:eax)
/* xor ecx, ecx */ newInst(Mnemonic_MOV, ecx, zero); // ecx = remainder-hi (rem. in ecx:ebx)
/* jmp saveResult*/
connectNodeTo(storeResultNode);
setCurrentNode(NULL);
oneDivNode = (Node*)0xDEADBEEF;
//
// bigDivisorNode:
//
setCurrentNode(bigDivisorNode);
/* mov temp_hi, edx */ newInst(Mnemonic_MOV, temp_hi, edx);
/* mov temp_lo, eax */ newInst(Mnemonic_MOV, temp_lo, eax); // save dividend
/* mov esi, ebx */ newInst(Mnemonic_MOV, esi, ebx);
/* mov edi, ecx */ newInst(Mnemonic_MOV, edi, ecx);
// ^^^divisor now in edi:ebx and ecx:esi
// shift both divisor and dividend right on 1 bit
/* shr edx, 1 */ newInst(Mnemonic_SHR, edx, one);
/* rcr eax, 1 */ newInst(Mnemonic_RCR, eax, one);
/* ror edi, 1 */ newInst(Mnemonic_ROR, edi, one);
/* rcr ebx, 1 */ newInst(Mnemonic_RCR, ebx, one);
//
// FIXME: workarounding WebMaker problem, which does not like
// both DEF and USE of the same arg in the same instruction.
// Replace, 'reg = BSR reg, 0' with
// temp = reg ; reg = BSR temp, 0
// Funny thing is that this only happens with BSR.
//
#if 0 // _FIXME_
/* bsr ecx, ecx */ newInst(Mnemonic_BSR, 1, ecx, ecx);
#else
Opnd* tmp = irManager->newOpnd(ecx->getType());
/* mov tmp, ecx */ newInst(Mnemonic_MOV, 1, tmp, ecx);
/* bsr ecx, tmp */ newInst(Mnemonic_BSR, 1, ecx, tmp);
#endif
// ecx = number of remaining shifts
// scale divisor and dividend down, so divisor fits
// into EBX (<2^32)
// FIXME: Currently, constraint resolver fails to assign 'ecx' operand to the
// real ECX, and this causes SpillGen failure.
// Forcing the ECX right here:
#if 0 //_FIXME_
/* shrd ebx, edi, CL*/ newInst(Mnemonic_SHRD, ebx, edi, ecx);
/* shrd eax, edx, CL*/ newInst(Mnemonic_SHRD, eax, edx, ecx);
/* shr edx, CL */ newInst(Mnemonic_SHR, edx, ecx);
#else
Opnd* trueECX = irManager->getRegOpnd(RegName_ECX);
newInst(Mnemonic_MOV, trueECX, ecx);
//~FIXME
/* shrd ebx, edi, CL*/ newInst(Mnemonic_SHRD, ebx, edi, trueECX);
/* shrd eax, edx, CL*/ newInst(Mnemonic_SHRD, eax, edx, trueECX);
/* shr edx, CL */ newInst(Mnemonic_SHR, edx, trueECX);
#endif
/* rol edi, 1 */ newInst(Mnemonic_ROL, edi, one);
// get quotient
/* div ebx */ newInst(Mnemonic_DIV, 2, edx, eax, edx, eax, ebx, NULL);
/* mov ebx, temp_lo*/ newInst(Mnemonic_MOV, ebx, temp_lo);
/* mov ecx, eax */ newInst(Mnemonic_MOV, ecx, eax);
/* imul edi, eax */ newInst(Mnemonic_IMUL, edi, eax);
/* mul esi */ newInst(Mnemonic_MUL, 2, edx, eax, eax, esi, NULL);
/* add edx, edi */ newInst(Mnemonic_ADD, 1, edx, edx, edi); // edx:eax = quotient * divisor
/* sub ebx, eax */ newInst(Mnemonic_SUB, 1, ebx, ebx, eax); // dividend-lo - (quot.*divisor)-lo
/* mov eax, ecx */ newInst(Mnemonic_MOV, eax, ecx);
/* mov ecx, temp_hi*/ newInst(Mnemonic_MOV, ecx, temp_hi); // restore dividend hi-word
/* sbb ecx, edx */ newInst(Mnemonic_SBB, 1, ecx, ecx, edx); // subtract divisor * quot. from dividend
/* sbb edx, edx */ newInst(Mnemonic_SBB, 1, edx, edx, edx); // 0 if remainder > 0, else FFFFFFFFh
/* and esi, edx */ newInst(Mnemonic_AND, 1, esi, esi, edx); // nothing to add
/* and edi, edx */ newInst(Mnemonic_AND, 1, edi, edi, edx); // back if remainder positive
/* add ebx, esi */ newInst(Mnemonic_ADD, 1, ebx, ebx, esi); // correct remainder
/* adc ecx, edi */ newInst(Mnemonic_ADC, 1, ecx, ecx, edi); // and quotient if
/* add eax, edx */ newInst(Mnemonic_ADD, 1, eax, eax, edx); // necessary
/* xor edx, edx */ newInst(Mnemonic_MOV, edx, zero); // clear hi-word of quot (eax<=FFFFFFFFh)
connectNodeTo(storeResultNode);
setCurrentNode(NULL);
bigDivisorNode = (Node*)0xDEADBEEF;
Node* testFixQuotSignNode = newBB();
Node* fixQuotSignNode = newBB();
Node* testFixRemSignNode = newBB();
Node* fixRemSignNode = newBB();
Node* finitaNode = newBB();
//
// storeResultNode:
//
Opnd* quot_lo = irManager->newOpnd(int32type);
Opnd* quot_hi = irManager->newOpnd(int32type);
Opnd* rem_lo = irManager->newOpnd(int32type);
Opnd* rem_hi = irManager->newOpnd(int32type);
setCurrentNode(storeResultNode);
/* mov rem_lo, ebx*/ newInst(Mnemonic_MOV, rem_lo, ebx);
/* mov rem_hi, ecx*/ newInst(Mnemonic_MOV, rem_hi, ecx);
/* mov quot_lo, eax*/ newInst(Mnemonic_MOV, quot_lo, eax);
/* mov quot_hi, edx*/ newInst(Mnemonic_MOV, quot_hi, edx);
connectNodeTo(testFixQuotSignNode);
setCurrentNode(NULL);
storeResultNode = (Node*)0xDEADBEEF;
//
// testFixQuotSignNode:
//
//
setCurrentNode(testFixQuotSignNode);
/* cmp fixQuotSign, 0 */ newInst(Mnemonic_CMP, fixQuotSign, zero);
/* jz testFixRemSignNode */ newBranch(Mnemonic_JZ, testFixRemSignNode, fixQuotSignNode);
setCurrentNode(NULL);
testFixQuotSignNode = (Node*)0xDEADBEEF;
//
// fixQuotSignNode:
//
setCurrentNode(fixQuotSignNode);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, quot_lo, quot_lo);
/* adc res_hi, 0 */ newInst(Mnemonic_ADC, 1, quot_hi, quot_hi, zero);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, quot_hi, quot_hi);
connectNodeTo(testFixRemSignNode);
setCurrentNode(NULL);
fixQuotSignNode = (Node*)0xDEADBEEF;
//
// testFixRemSignNode:
//
//
setCurrentNode(testFixRemSignNode);
/* cmp fixRemSign, 0 */ newInst(Mnemonic_CMP, fixRemSign, zero);
/* jz finitaNode */ newBranch(Mnemonic_JZ, finitaNode, fixRemSignNode);
setCurrentNode(NULL);
testFixQuotSignNode = (Node*)0xDEADBEEF;
//
// fixRemSignNode:
//
setCurrentNode(fixRemSignNode);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, rem_lo, rem_lo);
/* adc res_hi, 0 */ newInst(Mnemonic_ADC, 1, rem_hi, rem_hi, zero);
/* neg res_lo */ newInst(Mnemonic_NEG, 1, rem_hi, rem_hi);
connectNodeTo(finitaNode);
setCurrentNode(NULL);
fixQuotSignNode = (Node*)0xDEADBEEF;
//
// finitaNode:
//
setCurrentNode(finitaNode);
if (wantReminder) {
/* mov res_lo, rem_lo*/ newInst(Mnemonic_MOV, res_lo, rem_lo);
/* mov res_hi, rem_hi*/ newInst(Mnemonic_MOV, res_hi, rem_hi);
}
else {
/* mov res_lo, quot_lo*/ newInst(Mnemonic_MOV, res_lo, quot_lo);
/* mov res_hi, quot_hi*/ newInst(Mnemonic_MOV, res_hi, quot_hi);
}
connectNodes(finitaNode, getSubCfgReturnNode());
setCurrentNode(NULL);
finitaNode = (Node*)0xDEADBEEF;
Node* originalInstNode = inst->getNode();
propagateSubCFG(inst);
propagateDivRemResults(inst, originalInstNode, quot_lo, quot_hi, rem_lo, rem_hi);
}
void I8Lowerer::propagateDivRemResults(
Inst* origInst, Node* originalInstNode,
Opnd* quot_lo, Opnd* quot_hi,
Opnd* rem_lo, Opnd* rem_hi)
{
bool propagate_div_rem = false; //FIXME: somehow crashes on Linux/IA-32 in dominators - need to fix.
getArg("propagate_div_rem", propagate_div_rem);
if (!propagate_div_rem) {
return;
}
assert(origInst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(origInst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(origInst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* src1 = origInst->getOpnd(1);
Opnd* src2 = origInst->getOpnd(2);
if (!isSingleDef(src1) || !isSingleDef(src2)) {
// Not a single def operands - can't propagate with current
// trivial implementation, need more sophisticated routine.
return;
}
ControlFlowGraph* cfg = irManager->getFlowGraph();
DominatorTree* dt = cfg->getDominatorTree();
INST_ARRAY& i8insts = *m_pI8Insts;
for (INST_ARRAY::iterator i=i8insts.begin(); i != i8insts.end(); i++) {
Inst* inst = *i;
if (inst == origInst || inst == NULL) {
continue;
}
if (!inst->hasKind(Inst::Kind_I8PseudoInst) ||
inst->getMnemonic() != Mnemonic_IDIV) {
continue;
}
assert(inst->getOpndRoles(0) & Inst::OpndRole_Def);
assert(inst->getOpndRoles(1) & Inst::OpndRole_Use);
assert(inst->getOpndRoles(2) & Inst::OpndRole_Use);
Opnd* otherSrc1 = origInst->getOpnd(1);
Opnd* otherSrc2 = origInst->getOpnd(2);
if (otherSrc1 != src1 || otherSrc2 != src2) {
continue;
}
//
// Test whether both instructions belong to the same loop
//
Node* origNodeLoopHdr = (*m_pLoopInfos)[origInst];
Node* instLoopHdr = (*m_pLoopInfos)[inst];
if (origNodeLoopHdr != instLoopHdr) {
continue;
}
//
// Test whether one instruction dominates another
//
Node* instNode = inst->getNode();
if (!dt->dominates(originalInstNode, instNode)) {
continue;
}
Log::out() << "Propagating: I" << origInst->getId() << " => I" << inst->getId() << std::endl;
Opnd* otherDst = inst->getOpnd(0);
Opnd* otherDst_hi, *otherDst_lo;
prepareNewOpnds(otherDst, otherDst_lo, otherDst_hi);
const bool isRem = isI8RemInst(inst);
Inst* ii;
ii = irManager->newCopyPseudoInst(Mnemonic_MOV, otherDst_lo, isRem ? rem_lo : quot_lo);
ii->insertBefore(inst);
ii = irManager->newCopyPseudoInst(Mnemonic_MOV, otherDst_hi, isRem ? rem_hi : quot_hi);
ii->insertBefore(inst);
inst->unlink();
*i = NULL;
}
}
//IR verification routine.
//checks that there are no I8Pseudo insts left in CFG after the pass
static void checkIR(IRManager* irm) {
#ifdef _DEBUG
const Nodes& nodes = irm->getFlowGraph()->getNodes();
for (Nodes::const_iterator it = nodes.begin(), end = nodes.end(); it!=end; ++it) {
Node* node = *it;
for (Inst* inst = (Inst*)node->getFirstInst(); inst!=NULL; inst = inst->getNextInst()) {
assert(!inst->hasKind(Inst::Kind_I8PseudoInst));
}
}
#endif
}
}}