blob: e221ccd788a632fdf9bebb4ac54227ebf4485f03 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Intel, Pavel A. Ozhdikhin
*
*/
#undef STANDALONE_TEST
#include <iostream>
#include <fstream>
#include <float.h>
#include <cstdlib>
#include <algorithm>
#ifdef STANDALONE_TEST
#include "standalonetest2.h"
#else
#include "Opcode.h"
#include "Opnd.h"
#include "Type.h"
#include "Inst.h"
#include "IRBuilder.h"
#include "BitSet.h"
#include "Log.h"
#include "optimizer.h"
#include "simplifier.h"
#include "constantfolder.h"
#include "deadcodeeliminator.h"
#include "optarithmetic.h"
#include "irmanager.h"
#include "CompilationContext.h"
#include <float.h>
#include <math.h>
#include "Stl.h"
#include "simplifier.h"
#include "optarithmetic.h"
namespace Jitrino {
#ifndef DEBUG_MULTIPLYBYCONSTANT
#define DEBUGPRINT(x)
#define DEBUGPRINT2(x,y)
#else
void indent(int indentby) {
for (int i=0; i<indentby; i++) {
Log::out() << " ";
}
}
#define DEBUGPRINT(x) indent(2*depth); Log::out() << x << ::std::endl
#define DEBUGPRINT2(x,y) indent(2*depth); Log::out() << x; y.print(Log::out()); Log::out() << ::std::endl
#endif
#endif
struct MulOp {
enum Op { pushc=0, pushy=1, swap=2, dup=3, shladd=4, add=5, sub=6, neg=7, shiftl=8, dup2, dup3, numMulOps=11 };
};
const int methodLen = 400;
const int stackDepth = 400;
const int numMulOpnds[MulOp::numMulOps] = { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0 };
const int COST_LOADZERO = 0;
const int COST_LOADY = 0;
const int COST_NEG = 1;
const int COST_ADD = 1;
const int COST_SUB = 1;
class MulMethod {
private:
public:
int data[methodLen];
int len;
bool foripf;
int COST_ADDSHIFT_SMALL;
int COST_SHIFT;
int SMALL_SHIFT_MAXBITS;
MulMethod(bool ipf) : len(0), foripf(ipf) {
if (ipf) {
COST_ADDSHIFT_SMALL = 1;
COST_SHIFT = 1;
SMALL_SHIFT_MAXBITS = 4;
} else {
COST_ADDSHIFT_SMALL = 2;
COST_SHIFT = 4;
SMALL_SHIFT_MAXBITS = 3;
}
};
void append(MulOp::Op op0) {
assert((0<=op0)&&(op0<MulOp::numMulOps));
assert(numMulOpnds[op0]==0);
data[len] = op0; len += 1;
};
void append(MulOp::Op op0op, int opnd0) {
int op0 = (int) op0op;
assert((0<=op0)&&(op0<MulOp::numMulOps));
assert(numMulOpnds[op0]==1);
data[len] = op0; data[len+1] = opnd0; len += 2;
};
void makespace(int k) {
assert(len+k <= methodLen);
assert(k>0);
for (int i=len+k; i>k; i--) {
data[i] = data[i-k];
}
len += k;
};
void prepend(MulOp::Op op0) {
assert((0<=op0)&&(op0<MulOp::numMulOps));
assert(numMulOpnds[op0]==0);
makespace(1); data[0] = op0;
};
void prepend(MulOp::Op op0, int opnd0) {
assert((0<=op0)&&(op0<MulOp::numMulOps));
assert(numMulOpnds[op0]==1);
makespace(2); data[0] = op0; data[1] = opnd0;
};
void append(const MulMethod &other) {
assert(len+other.len <= methodLen);
for (int i=0; i < other.len; i++) {
data[len+i] = other.data[i];
}
len += other.len;
}
void prepend(const MulMethod &other) {
makespace(other.len);
for (int i=0; i < other.len; i++) {
data[i] = other.data[i];
}
}
int apply(int y, int *latency = 0) {
int thestack[stackDepth];
int when[stackDepth];
thestack[0] = 0xdeadbeef;
when[0] = -1;
int ip = 0;
int sp = 0;
while (ip < len) {
enum MulOp::Op op = (enum MulOp::Op) data[ip];
ip += 1;
assert(sp < stackDepth);
switch (op) {
case MulOp::pushc: assert(ip<len); thestack[sp] = data[ip++]; when[sp] = 0; sp += 1; break;
case MulOp::pushy: thestack[sp] = y; when[sp] = COST_LOADY; sp += 1; break;
case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); ::std::swap(when[sp-1], when[sp-2]); break;
case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; when[sp] = when[sp-1]; sp += 1; break;
case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; when[sp] = when[sp-2]; sp += 1; break;
case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; when[sp] = when[sp-3]; sp += 1; break;
case MulOp::add: assert(sp >= 2);
thestack[sp-2] = thestack[sp-2] + thestack[sp-1];
when[sp-2] = ::std::max(when[sp-2],when[sp-1]) + COST_ADD;
sp -=1;
break;
case MulOp::sub: assert(sp >= 2);
thestack[sp-2] = thestack[sp-2] - thestack[sp-1];
when[sp-2] = ::std::max(when[sp-2],when[sp-1]) + COST_SUB;
sp -=1; break;
case MulOp::shladd: assert(sp >= 2);
assert(ip<len);
assert(data[ip] <= SMALL_SHIFT_MAXBITS);
thestack[sp-2] = (thestack[sp-2] << data[ip++]) + thestack[sp-1];
when[sp-2] = ::std::max(when[sp-2],
when[sp-1]) + COST_ADDSHIFT_SMALL;
sp -=1; break;
case MulOp::neg: assert(sp >= 1);
thestack[sp-1] = -thestack[sp-1];
when[sp-1] = when[sp-1] + COST_NEG;
break;
case MulOp::shiftl: assert(sp >= 1);
assert(ip<len);
thestack[sp-1] = thestack[sp-1] << data[ip++];
when[sp-1] = when[sp-1]+ COST_SHIFT;
break;
default:
assert(0);
}
}
if (sp != 1) {
::std::cerr << ::std::endl;
::std::cerr << "sp != 1 after applying: ";
printOps(::std::cerr); ::std::cerr << ::std::endl;
assert(0);
}
if (latency) { *latency = when[0]; }
return thestack[0];
};
void printOps(::std::ostream &outs) const {
int ip = 0;
while (ip < len) {
enum MulOp::Op op = (enum MulOp::Op) data[ip];
ip += 1;
switch (op) {
case MulOp::pushc: outs << "push " << data[ip++] << "; "; break;
case MulOp::pushy: outs << "push y; "; break;
case MulOp::swap: outs << "swap; "; break;
case MulOp::dup: outs << "dup; "; break;
case MulOp::dup2: outs << "dup2; "; break;
case MulOp::dup3: outs << "dup3; "; break;
case MulOp::add: outs << "add; "; break;
case MulOp::sub: outs << "sub; "; break;
case MulOp::shladd: outs << "shladd " << data[ip++] << "; "; break;
case MulOp::neg: outs << "neg; "; break;
case MulOp::shiftl: outs << "shiftl " << data[ip++] << "; "; break;
default: break;
}
}
}
enum What { IsConstant, IsY, IsSub };
static void printSub(::std::ostream &outs, int onstack, enum What what) {
switch (what) {
case IsConstant: outs << onstack; break;
case IsY: outs << "Y"; break;
case IsSub: outs << "t" << onstack; break;
default: assert(0);
}
}
void print(::std::ostream &outs) const {
int thestack[stackDepth];
What what[stackDepth];
thestack[0] = 0xdeadbeef;
what[0] = IsConstant;
int ip = 0;
int sp = 0;
int subnum = 0;
if (len == 0)
return; // no reduction
while (ip < len) {
enum MulOp::Op op = (enum MulOp::Op) data[ip];
ip += 1;
assert(sp < stackDepth);
switch (op) {
case MulOp::pushc: assert(ip<len); thestack[sp] = data[ip++]; what[sp] = IsConstant; sp += 1; break;
case MulOp::pushy: thestack[sp] = 0; what[sp] = IsY; sp += 1; break;
case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); ::std::swap(what[sp-1], what[sp-2]); break;
case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; what[sp] = what[sp-1]; sp += 1; break;
case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; what[sp] = what[sp-2]; sp += 1; break;
case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; what[sp] = what[sp-3]; sp += 1; break;
case MulOp::add:
{
assert(sp >= 2);
int thissub = ++subnum;
outs << "t" << thissub << " = add ";
printSub(outs, thestack[sp-2], what[sp-2]); outs << ", ";
printSub(outs, thestack[sp-1], what[sp-1]); outs << "; ";
thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1;
break;
}
case MulOp::sub:
{
assert(sp >= 2);
int thissub = ++subnum;
outs << "t" << thissub << " = sub ";
printSub(outs, thestack[sp-2], what[sp-2]); outs << ", ";
printSub(outs, thestack[sp-1], what[sp-1]); outs << "; ";
thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1;
break;
}
case MulOp::shladd:
{
assert(sp >= 2);
assert(ip<len);
assert(data[ip] <= SMALL_SHIFT_MAXBITS);
int thissub = ++subnum;
outs << "t" << thissub << " = shladd ";
printSub(outs, thestack[sp-2], what[sp-2]);
outs << ", " << data[ip++] << ", ";
printSub(outs, thestack[sp-1], what[sp-1]); outs << "; ";
thestack[sp-2] = thissub; what[sp-2] = IsSub; sp -=1;
break;
}
case MulOp::neg:
{
assert(sp >= 1);
int thissub = ++subnum;
outs << "t" << thissub << " = neg ";
printSub(outs, thestack[sp-1], what[sp-1]); outs << "; ";
thestack[sp-1] = thissub; what[sp-1] = IsSub;
break;
}
case MulOp::shiftl:
{
assert(sp >= 1);
assert(ip<len);
int thissub = ++subnum;
outs << "t" << thissub << " = shiftl ";
printSub(outs, thestack[sp-1], what[sp-1]); outs << ", " << data[ip++] << "; ";
thestack[sp-1] = thissub; what[sp-1] = IsSub;
break;
}
default:
assert(0);
}
}
if (sp != 1) {
::std::cerr << ::std::endl;
::std::cerr << "sp != 1 after applying: ";
printOps(::std::cerr); ::std::cerr << ::std::endl;
assert(0);
}
outs << "r = ";
printSub(outs, thestack[sp-1], what[sp-1]);
};
int getCost() {
if (len == 0) {
return 10000;
} else {
int latency;
apply(13, &latency);
return latency;
}
}
#ifndef STANDALONE_TEST
Opnd *genCode(Type *type, Simplifier *simp, Opnd *y)
{
Opnd *thestack[stackDepth];
Type::Tag tag = y->getType()->tag;
bool width32 = ((tag == Type::Int32) || (tag == Type::UInt32));
thestack[0] = 0;
int ip = 0;
int sp = 0;
if (len == 0)
return NULL; // no reduction
while (ip < len) {
enum MulOp::Op op = (enum MulOp::Op) data[ip];
ip += 1;
assert(sp < stackDepth);
switch (op) {
case MulOp::pushc: {
assert(ip<len);
int val = data[ip]; ip += 1;
Opnd *op = (width32
? simp->genLdConstant((I_32)(val))->getDst()
: simp->genLdConstant((int64)(val))->getDst());
thestack[sp] = op;
sp += 1;
} break;
case MulOp::pushy: thestack[sp] = y; sp += 1; break;
case MulOp::swap: assert(sp >= 2); ::std::swap(thestack[sp-1], thestack[sp-2]); break;
case MulOp::dup: assert(sp >= 1); thestack[sp] = thestack[sp-1]; sp += 1; break;
case MulOp::dup2: assert(sp >= 2); thestack[sp] = thestack[sp-2]; sp += 1; break;
case MulOp::dup3: assert(sp >= 2); thestack[sp] = thestack[sp-3]; sp += 1; break;
case MulOp::add: assert(sp >= 2);
thestack[sp-2] = simp->genAdd(type, Modifier(Overflow_None)|Modifier(Exception_Never)|Modifier(Strict_No), thestack[sp-2], thestack[sp-1])->getDst();
sp -=1;
break;
case MulOp::sub: assert(sp >= 2);
thestack[sp-2] = simp->genSub(type, Modifier(Overflow_None)|Modifier(Exception_Never)|Modifier(Strict_No), thestack[sp-2], thestack[sp-1])->getDst();
sp -=1; break;
case MulOp::shladd: {
assert(sp >= 2);
assert(ip<len);
int val = data[ip]; ip += 1;
assert(val <= SMALL_SHIFT_MAXBITS);
Opnd *op = simp->genLdConstant((I_32)(val))->getDst();
thestack[sp-2] = simp->genShladd(type, thestack[sp-2], op, thestack[sp-1])->getDst();
sp -=1; } break;
case MulOp::neg: assert(sp >= 1);
thestack[sp-1] = simp->genNeg(type, thestack[sp-1])->getDst();
break;
case MulOp::shiftl: {
assert(sp >= 1);
assert(ip<len);
int val = data[ip]; ip += 1;
Opnd *op = simp->genLdConstant((I_32)(val))->getDst();
thestack[sp-1] = simp->genShl(type, ShiftMask_None,
thestack[sp-1], op)->getDst();
} break;
default:
assert(0);
}
}
assert(sp == 1);
return thestack[0];
}
#endif // !STANDALONE_TEST
};
template <typename inttype, int width>
bool testMul(MulMethod &method, inttype d, inttype num);
template <typename inttype, int width>
void planMulCompound(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMulLookup(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMulFactor(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMulLarge(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMulNeg(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMul(MulMethod &m, inttype d, int depth);
template <typename inttype, int width>
void planMul(MulMethod &m, inttype d, int depth) {
DEBUGPRINT("planMul(" << (int)d << ")");
if (d == -1) {
m.append(MulOp::pushy); m.append(MulOp::neg);
} else if (d == 1) {
m.append(MulOp::pushy);
} else if (d == 0) {
m.append(MulOp::pushc, 0);
} else if (d == -d) {
int k = width - 1;
m.append(MulOp::pushy); m.append(MulOp::shiftl, k);
} else {
/* ONLY DO REDUCTION FOR THE TWO CASES */
if ((d < 32) && (d > 0)) {
planMulLookup<inttype, width>(m, d, depth+1);
} else if (isPowerOf2(d)) {
int k = whichPowerOf2<inttype, width>(d);
m.append(MulOp::pushy); m.append(MulOp::shiftl, k);
}
/*
if (d < 0) {
planMulNeg<inttype, width>(m, d, depth);
} else if (d < 32) {
planMulLookup<inttype, width>(m, d, depth+1);
} else if (isPowerOf2(d)) {
int k = whichPowerOf2<inttype, width>(d);
m.append(MulOp::pushy); m.append(MulOp::shiftl, k);
} else {
planMulLarge<inttype, width>(m, d, depth+1);
}
*/
}
#ifdef TEST_OUTPUT
DEBUGPRINT2("planMul(" << d << ")", res);
for (int i=0; i<10000; i += 17) {
inttype num = i;
if (!(testMul<inttype, width>(m, res, d, num) &&
testMul<inttype, width>(m, res, d, -num) &&
testMul<inttype, width>(m, res, d, 0x7fffffff+num) &&
testMul<inttype, width>(m, res, d, 0x7fffffff-num))) {
return;
}
}
#endif
}
template <typename inttype, int width>
void planMulNeg(MulMethod &m, inttype d, int depth) {
DEBUGPRINT("planMulNeg(" << (int)d << ")");
if (depth == 1) {
// this seems to win by 2 cycles only in about 2/25000 cases
// and by 1 cycle only in about 1/50 cases
DEBUGPRINT("planMulNeg(" << (int)d << ") case 1");
MulMethod res1(m.foripf);
planMul<inttype, width>(res1, -d, depth+1); res1.append(MulOp::neg);
// this seems to win by 2 cycles in about 1/250 cases
// and by 1 cycle in 1/10 cases
// but that's not enough to justify a search
DEBUGPRINT("planMulNeg(" << (int) d << ") case 2");
MulMethod res2(m.foripf);
inttype dinv = ~d;
int numones = nlz<inttype, width>(dinv);
int shiftby = width - numones;
inttype newd = d + ((inttype)1 << shiftby);
if (shiftby <= m.SMALL_SHIFT_MAXBITS) {
DEBUGPRINT("planMulNeg(" << (int) d << ") case 2a, shiftby=" << shiftby);
res2.append(MulOp::pushy); res2.append(MulOp::neg);
planMul<inttype, width>(res2, newd, depth+1);
res2.append(MulOp::shladd, shiftby);
} else {
DEBUGPRINT("planMulNeg(" << (int) d << ") case 2b, shiftby=" << shiftby);
planMul<inttype, width>(res2, newd, depth+1);
res2.append(MulOp::pushy); res2.append(MulOp::shiftl, shiftby);
res2.append(MulOp::sub);
}
DEBUGPRINT("planMulNeg(" << (int) d << ") case 3");
MulMethod res3(m.foripf);
planMulCompound<inttype, width>(res3, d, depth+1);
int latency1 = res1.getCost();
int latency2 = res2.getCost();
int latency3 = res3.getCost();
if ((latency1 <= latency2) && (latency1 <= latency3)) {
if (latency2 < latency3) {
DEBUGPRINT("choose neg method 1 by " << latency2-latency1 << " over 2");
} else if (latency3 < latency2) {
DEBUGPRINT("choose neg method 1 by " << latency3-latency1 << " over 3");
} else {
DEBUGPRINT("choose neg method 1 by " << latency2-latency1 << " over 2 & 3");
}
m.append(res1);
} else if (latency2 < latency3) {
if (latency1 < latency3) {
DEBUGPRINT("choose neg method 2 by " << latency1 - latency2 << " over 1");
} if (latency1 > latency3) {
DEBUGPRINT("choose neg method 2 by " << latency3 - latency2 << " over 3");
} else {
DEBUGPRINT("choose neg method 2 by " << latency3 - latency2 << " over 1 & 3");
}
m.append(res2);
} else {
if (latency1 < latency2) {
DEBUGPRINT("choose neg method 3 by " << latency1 - latency3 << " over 1");
} if (latency1 > latency2) {
DEBUGPRINT("choose neg method 3 by " << latency2 - latency3 << " over 2");
} else {
DEBUGPRINT("choose neg method 3 by " << latency2 - latency3 << " over 1 & 2");
}
m.append(res3);
}
} else {
DEBUGPRINT("planMulNeg(" << (int) d << ") case 3a");
planMulCompound<inttype, width>(m, d, depth+1);
}
DEBUGPRINT("done planMulNeg(" << (int) d << ")");
}
template <typename inttype, int width>
void planMulFactor(MulMethod &m, inttype d, int depth)
{
DEBUGPRINT("planMulFactor(" << (int) d << ")");
MulMethod res2(m.foripf);
int shiftby = m.SMALL_SHIFT_MAXBITS;
inttype factor = (1 << shiftby) + 1;
// small factors
while (factor > 2) {
if ((d % factor) == 0) {
DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor);
MulMethod res1(m.foripf);
inttype quot = d / factor;
planMul<inttype, width>(res1, quot, depth+1);
res1.append(MulOp::dup);
res1.append(MulOp::shladd, shiftby);
m.append(res1);
return;
}
factor = (factor >> 1) + 1;
shiftby -= 1;
}
// bigger factors
shiftby = width - 2;
factor = (1 << shiftby) + 1;
while (factor > (1 << m.SMALL_SHIFT_MAXBITS) + 1) {
if ((d % factor) == 0) {
DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor);
MulMethod res1(m.foripf);
inttype quot = d / factor;
planMul<inttype, width>(res1, quot, depth+1);
res1.append(MulOp::dup);
res1.append(MulOp::shiftl, shiftby);
res1.append(MulOp::add);
m.append(res1);
return;
}
factor = (factor >> 1) + 1;
shiftby -= 1;
}
// bigger factors
shiftby = width - 2;
factor = (1 << (int) shiftby) - 1;
while (factor > 3) {
if ((d % factor) == 0) {
MulMethod res1(m.foripf);
if (d > 0) {
DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) factor);
inttype quot = d / factor;
planMul<inttype, width>(res1, quot, depth+1);
res1.append(MulOp::dup);
res1.append(MulOp::shiftl, shiftby);
res1.append(MulOp::swap);
res1.append(MulOp::sub);
m.append(res1);
return;
} else {
DEBUGPRINT("planMulFactor(" << (int) d << ") found factor " << (int) -factor);
inttype quot = -d / factor;
planMul<inttype, width>(res1, quot, depth+1);
res1.append(MulOp::dup);
res1.append(MulOp::shiftl, shiftby);
res1.append(MulOp::sub);
m.append(res1);
return;
}
}
factor = ((factor+1) >> 1) - 1;
shiftby -= 1;
}
DEBUGPRINT("done planMulFactor(" << (int) d << ")");
m.append(res2);
}
template <typename inttype, int width>
void planMulLarge(MulMethod &m, inttype d, int depth)
{
if (depth < 6) {
MulMethod res1(m.foripf);
planMulCompound<inttype, width>(res1, d, depth);
MulMethod res2(m.foripf);
planMulFactor<inttype, width>(res2, d, depth);
int latency1 = res1.getCost();
int latency2 = res2.getCost();
if (latency1 <= latency2) {
DEBUGPRINT("choose large method 1: compound");
m.append(res1);
} else {
DEBUGPRINT("choose large method 2: factor");
m.append(res2);
}
} else {
planMulCompound<inttype, width>(m, d, depth);
}
}
template <typename inttype, int width>
void planMulCompound(MulMethod &m, inttype d, int depth) {
assert((d < 0) || (d > 31));
DEBUGPRINT("planMulCompound(" << (int) d << ")");
inttype delta = d ^ (d >> 1); // bits differing from from bit to left
int numChanges = popcount<inttype, width>(delta);
int deltaright = ntz<inttype, width>(delta);
int deltaleft = nlz<inttype, width>(delta);
int bitswidth = width - deltaright - deltaleft - 1;
int small_shift_maxbits = m.SMALL_SHIFT_MAXBITS;
if ((numChanges < 4) ||
(bitswidth <= 5)) { // small enough to worry about sign/rightbit issues
DEBUGPRINT("case few changes");
// we assume we've dealt with <31 elsewhere, so we must have some shifting ahead of us.
if ((d & 1) == 1) {
inttype dinv = ~d;
DEBUGPRINT("case odd");
int rightones = ntz<inttype, width>(dinv);
if (rightones == 1) {
inttype dsub1 = d-1;
int dsub1rightzeros = ntz<inttype, width>(dsub1);
if (dsub1rightzeros > small_shift_maxbits) {
planMul<inttype, width>(m,
dsub1 >> small_shift_maxbits,
depth+1);
m.append(MulOp::pushy);
m.append(MulOp::shladd, small_shift_maxbits);
} else {
planMul<inttype, width>(m,
dsub1 >> dsub1rightzeros,
depth+1);
m.append(MulOp::pushy);
m.append(MulOp::shladd, dsub1rightzeros);
}
} else if (rightones <= small_shift_maxbits) {
planMul<inttype, width>(m, (d+1)>>rightones, depth+1);
m.append(MulOp::pushy);
m.append(MulOp::neg);
m.append(MulOp::shladd, rightones);
} else {
planMul<inttype, width>(m, (d+1)>>small_shift_maxbits,
depth+1);
m.append(MulOp::pushy); m.append(MulOp::neg);
m.append(MulOp::shladd, small_shift_maxbits);
}
} else {
DEBUGPRINT("case even");
int rightzeros = deltaright+1;
assert((rightzeros == ntz<inttype, width>(d)));
if (bitswidth <= 5) {
DEBUGPRINT("case narrow");
if ((rightzeros <= small_shift_maxbits) &&
(rightzeros + bitswidth >= 2 * small_shift_maxbits)) {
DEBUGPRINT("case smallshiftadd");
inttype newd = d + ((inttype)1 << rightzeros);
m.append(MulOp::pushy); m.append(MulOp::neg);
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::shladd, rightzeros);
} else {
inttype newd = d >> rightzeros;
DEBUGPRINT("case smallshift");
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::shiftl, rightzeros);
}
} else {
DEBUGPRINT("case even, wider");
inttype deltasubright = delta - ((inttype)1 << deltaright);
int deltasubright_ntz = ntz<inttype, width>(deltasubright);
int region2 = deltasubright_ntz - deltaright +1;
assert(region2 > 0);
if (region2 > 2) {
inttype newd = d + ((inttype)1 << rightzeros);
if (rightzeros <= small_shift_maxbits) {
DEBUGPRINT("case wide smalladdshift");
m.append(MulOp::pushy); m.append(MulOp::neg);
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::shladd, rightzeros);
} else {
DEBUGPRINT("case wide sub of shift");
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::pushy); m.append(MulOp::shiftl, rightzeros);
m.append(MulOp::sub);
}
} else if (region2 == 2) {
inttype newd = d - (3 << rightzeros);
DEBUGPRINT("case wide addshift3");
if (rightzeros <= small_shift_maxbits) {
m.append(MulOp::pushy); m.append(MulOp::pushy);
m.append(MulOp::shladd, 1);
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::shladd, rightzeros);
} else {
DEBUGPRINT("case wide add shift3");
planMul<inttype, width>(m, newd >> small_shift_maxbits,
depth+1);
m.append(MulOp::pushy); m.append(MulOp::pushy);
m.append(MulOp::shladd, 1);
m.append(MulOp::shiftl, rightzeros);
m.append(MulOp::shladd, small_shift_maxbits);
}
} else { // region2 == 1
inttype newd = d - ((inttype)1 << rightzeros);
if (rightzeros <= small_shift_maxbits) {
DEBUGPRINT("case wide smalladdshift1");
m.append(MulOp::pushy);
planMul<inttype, width>(m, newd, depth+1);
m.append(MulOp::shladd, rightzeros);
} else {
DEBUGPRINT("case wide sub of shift");
planMul<inttype, width>(m, newd >> small_shift_maxbits,
depth+1);
m.append(MulOp::pushy);
m.append(MulOp::shiftl, rightzeros);
m.append(MulOp::shladd, small_shift_maxbits);
}
}
}
}
} else { // bitswidth > 5
assert(numChanges >= 2);
DEBUGPRINT("case wide");
if (numChanges == 2) {
DEBUGPRINT("case numChanges==2");
if ((numChanges&1)==0) {
DEBUGPRINT("case wide changes=2 even");
int rightzeros = deltaright+1;
int leftzeros = deltaleft;
assert((rightzeros == ntz<inttype, width>(d)));
assert((leftzeros == nlz<inttype, width>(d)));
if (rightzeros <= small_shift_maxbits) {
DEBUGPRINT("case wide smallshiftadd");
assert(rightzeros+bitswidth >= 2 * small_shift_maxbits);
m.append(MulOp::pushy); m.append(MulOp::neg);
m.append(MulOp::pushy); m.append(MulOp::shiftl, width - leftzeros);
m.append(MulOp::shladd, rightzeros);
} else {
DEBUGPRINT("case wide narrow sub");
m.append(MulOp::pushy); m.append(MulOp::shiftl, width - leftzeros);
m.append(MulOp::pushy); m.append(MulOp::shiftl, rightzeros);
m.append(MulOp::sub);
}
} else {
DEBUGPRINT("case wide changes=2 odd");
}
} else {
// first try: use point width/2 to split number
DEBUGPRINT("case numChanges>2");
int rightzeros = ((d & 1)!=0) ? 0 : deltaright+1;
#ifndef NDEBUG
int leftzeros = ((d & ((inttype)1<<(width-1))) != 0) ? 0 : deltaleft;
#endif
assert((rightzeros == ntz<inttype, width>(d)));
assert((leftzeros == nlz<inttype, width>(d)));
int k = (numChanges + 1) >> 2;
assert(k > 0); // we had numChanges>=3 above.
inttype stripright = d;
while (k > 0) {
// turn rightmost group of 0s into 1s, no effect if none
inttype striprightzeros = stripright | (stripright - 1);
// turn rightmost group of 1s into 0s, must be some
inttype srzinv = ~striprightzeros; // first invert
inttype sroinv = srzinv | (srzinv - 1); // strip 0s
stripright = ~sroinv; // invert back
k -= 1;
}
// now stripright is d but with 0s from about the middle
// of changes to the right
// find a point one place to the left of the set of 0s
int zeropoint = ntz<inttype, width>(stripright);
inttype zerobit = (inttype(1))<<zeropoint;
assert((d & zerobit) != 0);
assert((d & (zerobit>>1)) == 0);
DEBUGPRINT("zerobit is " << (int) zerobit);
DEBUGPRINT("zeropoint is " << (int) zeropoint);
inttype rightmask = zerobit - 1;
inttype leftmask = ~rightmask;
DEBUGPRINT("leftmask is " << (int) leftmask << ", rightmask is " << (int) rightmask);
inttype rightd = d & rightmask;
inttype leftd = d & leftmask;
DEBUGPRINT("leftd is " << (int) leftd << ", rightd is " << (int) rightd);
int leftdrightzeros = ntz<inttype, width>(leftd);
if (rightzeros == 0) {
DEBUGPRINT("case 0");
if (leftdrightzeros < small_shift_maxbits) {
DEBUGPRINT("case 0a");
planMul<inttype, width>(m, leftd >> leftdrightzeros,
depth+1);
planMul<inttype, width>(m, rightd, depth+1);
m.append(MulOp::shladd, leftdrightzeros);
} else {
planMul<inttype, width>(m, leftd, depth+1);
planMul<inttype, width>(m, rightd, depth+1);
m.append(MulOp::add);
}
} else if ((0 < rightzeros) && (rightzeros <= small_shift_maxbits)) {
DEBUGPRINT("case 1");
planMul<inttype, width>(m, rightd >> rightzeros, depth+1);
planMul<inttype, width>(m, leftd, depth+1);
m.append(MulOp::shladd, rightzeros);
} else {
DEBUGPRINT("case 3");
planMul<inttype, width>(m, leftd >> small_shift_maxbits,
depth+1);
planMul<inttype, width>(m, rightd, depth+1);
m.append(MulOp::shladd, small_shift_maxbits);
}
DEBUGPRINT("done");
}
}
}
template <typename inttype, int width>
void planMulLookup(MulMethod &m, inttype d, int depth) {
assert((d>=0) && (d < 32));
DEBUGPRINT("planMulLookup(" << (int) d << ")");
switch (d) {
case 0: m.append(MulOp::pushc, 0); break;
case 1: m.append(MulOp::pushy); break;
case 2: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 3: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1); break;
case 4: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break;
case 5: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2); break;
case 6:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 7:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break;
case 8: m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3); break;
case 9: m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3); break;
case 10:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 11:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break;
case 12:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break;
case 13:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break;
case 14:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 15:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::dup); m.append(MulOp::shladd, 2); break;
case 16:
m.append(MulOp::pushy);
if (m.SMALL_SHIFT_MAXBITS == 4) {
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 4);
} else if (m.SMALL_SHIFT_MAXBITS == 3) {
m.append(MulOp::shiftl, 4);
} else {
assert(0);
}
break;
case 17:
m.append(MulOp::pushy); m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
break;
case 18:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 19:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 3);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break;
case 20:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break;
case 21:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break;
case 22:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 23:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break;
case 24:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 3); break;
case 25:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 3); break;
case 26:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 2);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 27:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::dup); m.append(MulOp::shladd, 3); break;
case 28:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 2); break;
case 29:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1);
m.append(MulOp::pushy); m.append(MulOp::shladd, 2); break;
case 30:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushc, 0); m.append(MulOp::shladd, 1); break;
case 31:
m.append(MulOp::pushy); m.append(MulOp::dup); m.append(MulOp::shladd, 1);
m.append(MulOp::dup); m.append(MulOp::shladd, 2);
m.append(MulOp::pushy); m.append(MulOp::shladd, 1); break;
default:
assert(0);
}
}
#ifdef STANDALONE_TEST
#include "testharness2.h"
#else // !STANDALONE_TEST
Opnd *
Simplifier::planMul32(I_32 multiplier, Opnd *opnd)
{
const OptimizerFlags& optimizerFlags = irManager.getOptimizerFlags();
MulMethod method(!optimizerFlags.ia32_code_gen);
planMul<I_32, 32>(method, multiplier, 1);
if (Log::isEnabled()) {
Log::out() << "in multiply(" << (int) multiplier << ", ";
opnd->print(Log::out());
Log::out() << "), method is ";
method.print(Log::out()); Log::out() << ::std::endl;
}
return method.genCode(opnd->getType(), this, opnd);
}
Opnd *
Simplifier::planMul64(int64 multiplier, Opnd *opnd)
{
const OptimizerFlags& optimizerFlags = irManager.getOptimizerFlags();
MulMethod method(!optimizerFlags.ia32_code_gen);
planMul<int64, 64>(method, multiplier, 1);
if (Log::isEnabled()) {
Log::out() << "in multiply(" << (int) multiplier << ", ";
opnd->print(Log::out());
Log::out() << "), method is ";
method.print(Log::out()); Log::out() << ::std::endl;
}
return method.genCode(opnd->getType(), this, opnd);
}
#endif
} //namespace Jitrino