blob: 5d2fc73c7d987141d0f78f94c9e8974f7e306038 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Alexander Astapchuk
*/
/**
* @file
* @brief Encoder class and related definitions.
*/
#if !defined(__ENC_H_INCLUDED__)
#define __ENC_H_INCLUDED__
#include "jdefs.h"
#include <assert.h>
#include <stdarg.h>
#include <bitset>
#include <string>
#include <map>
#include <algorithm>
using std::string;
using std::bitset;
using std::map;
using std::max;
/**
* @brief Size of stack slot affected by a regular PUSH/POP instruction.
*
* Normally equals to the size of general-purpose register of the platform.
*/
#define STACK_SLOT_SIZE (sizeof(void*))
/**
* @brief Rounds given \c bytes to the integer number of stack slots.
*/
#define STACK_SIZE(bytes) \
((bytes + (STACK_SLOT_SIZE-1)) & ~(STACK_SLOT_SIZE-1))
namespace Jitrino {
namespace Jet {
class CallSig;
class Encoder;
/**
* @brief A signed integer type, with the same size as a pointer.
*/
typedef POINTER_SIZE_SINT int_ptr;
typedef POINTER_SIZE_INT uint_ptr;
/**
* @brief A dynamically grown byte array.
*
* Class CodeStream represents a dynamically growing byte array, which
* always provides buffer of at least minimal guaranteed size (which is
* #BUFFER_ZONE).
*
* The usage is as follows:
* @code
* CodeStream cs;
* cs.init(INITIALLY_EXPECTED_CODE_SIZE);
* unsigned p_start = cs.ipoff();
* char * p = cs.ip();
* memcpy(p, otherP, someSize_less_than_BUFFER_ZONE);
* cs.ip(p + someSize_less_than_BUFFER_ZONE);
* ...
* unsigned next_p = cs.ipoff();
* @endcode
*/
class CodeStream {
public:
CodeStream() { m_buf = NULL; m_size = 0; }
~CodeStream() { if (m_buf) { free(m_buf); } };
/**
* @brief Performs initial memory allocation.
*
* The memory size allocated is 'bytes' and then grow when necessary.
*/
void init(unsigned bytes)
{
resize(bytes);
}
/**
* @brief Returns address of a next available for writing byte.
*
* The address returned is guaranteed to contain at least #BUFFER_ZONE
* bytes.
* The returned address is valid only until the next call to #ip(void)
* where a memory reallocation can be triggered, and thus should not be stored between
* such calls. Use ipoff() instead which is consistent during the lifetime of
* CodeStream object.
*/
char * ip(void)
{
return m_buf + m_size;
}
char * ip(unsigned ipoff)
{
return data() + ipoff;
}
/**
* Sets current address. This must be an address of a next available byte.
*/
void ip(char * _ip)
{
assert((U_32)(_ip - m_buf) == (uint64)(_ip - m_buf));
m_size = (U_32)(_ip - m_buf);
assert(m_size < total_size);
// Need to be done here, and not in ip(void).
// Otherwise, the following usage template:
// patch(pid, m_codeStream.data() + br_ipoff, ip())
// may fail, if ip(void) triggers reallocation
if ((total_size - m_size) < BUFFER_ZONE) {
resize(total_size +
max((unsigned)BUFFER_ZONE, total_size*GROW_RATE/100));
}
}
/**
* Returns an offset the next available byte in the stream.
*/
unsigned ipoff(void) const
{
return m_size;
}
/**
* Returns the size used in the stream.
*/
unsigned size(void) const
{
return m_size;
}
/**
* Provides a direct access to internal buffer. Never use more than
* size() bytes.
*/
char * data(void) const
{
return m_buf;
}
/**
* The minimum guaranteed size of the buffer returned by ip().
* This is also a minimal size of the buffer which triggers
* reallocation of a bigger memory buf. '16' here is max size of the
* native instruction (at least on IA32/EM64T). 3 was chosen empirically.
*/
enum { BUFFER_ZONE = 16*3 };
private:
/**
* Perform the [re-]allocation of a memory.
* The previously filled memory (if any) is copied into the newly allocated buffer.
*/
void resize(unsigned how_much)
{
total_size = max(how_much, (unsigned)BUFFER_ZONE);
m_buf = (char*)realloc(m_buf, total_size);
}
/**
* A pointer to the allocated buffer.
*/
char * m_buf;
/**
* A size of the buffer allocated.
*/
unsigned total_size;
/**
* A size of memory currently in use.
*/
unsigned m_size;
/**
* A rate how to increase the already allocated buffer, in percent.
* The default value 25 means that the buffer will grow by 25% each allocation:
* i.e. if the first size passed to init() was 32, the allocations will be:
* 32 ; 40 (=32+32*0.25) ; 50 (=40+50*0.25) etc.
*/
static const unsigned GROW_RATE = 25;
};
/**
* @brief Tests whether specified #jtype is too big to fit into a single
* register on the current platform.
*
* The only case currently is i64 on IA32.
*
* This is a characteristics of the platform, so it's placed in enc.h.
*/
inline bool is_big(jtype jt)
{
#if defined(_EM64T_) || defined(_IPF_)
return false;
#else
return jt==i64;
#endif
}
/**
* @brief Returns a #jtype used to move an item of specified type (or part
* of the item - in case of big type).
*
* The only case currently is that jtmov() returns i32 for jt=i64.
*/
inline jtype jtmov(jtype jt)
{
return is_big(jt) ? i32 : jt;
}
/**
* @brief true if current platform is IA32.
*
* Some presumptions are made about IA32 platform: there are no comressed
* references on it (not directly related to Encoder), an address fits into
* 32 bits and fits into displacement of complex address form.
*/
inline bool is_ia32(void)
{
#if defined(_IA32_)
return true;
#else
return false;
#endif
}
/**
* @brief Tests whether the specified value fits into 32 bit value.
*/
inline bool fits32(jlong val)
{
return (INT_MIN <= val) && (val <= INT_MAX);
}
/**
* @brief Tests whether the specified address fits into 32 bit value.
*
* Always true on IA32. Not always true on 64 bit platforms.
*/
inline bool fits32(const void* addr)
{
#ifdef _IA32_
return true;
#else
return fits32((jlong)(int_ptr)addr);
#endif
}
/**
* @brief
*
* AR stands for Abstract Register.
*
* Every register may be uniquely identified either by its unique index
* (common index) or by a combination of type and index (which is another -
* type - index).
*
* The common index is unique for each register, and lies in the range of
* (ar_idx(ar_x);ar_num), exclusive.
*
* The type index is unique only within the given group of registers and
* lies in the range of [gr_idx(gr0); gr_idx(gr0+gr_total-1)] for gr
* registers and [fr_idx(fr0); fr_idx(fr0+fr_total-1)], inclusive.
*
*/
enum AR {
ar_x, fr_x = ar_x, gr_x = ar_x,
//
// General-purpose registers
//
// EAX, EBX, ECX, EDX, ESI, EDI,
gr0, gr1, gr2, gr3, gr4, gr5,
#ifdef _EM64T_
// R8, R9, R10, R11, R12, R13, R14, R15
gr6, gr7, gr8, gr9, gr10, gr11, gr12, gr13,
#endif
bp, sp,
//
// Float-point registers
//
fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
#ifdef _EM64T_
fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15,
#endif
//
// Specials
//
fp0, // top FPU stacked register
//
#ifdef _EM64T_
gr_num=15, /// not including sp
gr_total = 16, /// including sp
fr_num=16,
#else
gr_num=7, /// not including sp
gr_total = 8, /// including sp
fr_num=8,
#endif
fr_total=fr_num,
ar_total = fr_total + gr_total, ar_num = ar_total,
};
/**
* @brief Returns true if the register is float-point register.
*/
inline bool is_f(AR ar)
{
return (fr0 <= ar && ar < (fr0+fr_total));
}
/**
* @brief Returns true if the register is float-point register.
*/
inline bool is_fr(AR ar)
{
return is_f(ar);
}
/**
* @brief Returns true if the register is general-purpose register.
*/
inline bool is_gr(AR ar)
{
return (gr0 <= ar && ar < (gr0+gr_total));
}
/**
* @brief Constructs AR from with the given index (common index).
*/
inline AR _ar(unsigned idx)
{
assert(idx<ar_total);
return (AR)(gr0+idx);
}
/**
* @brief Constructs AR from the given jtype and register index (type index).
*/
inline AR _ar(jtype jt, unsigned idx)
{
if (is_f(jt)) {
assert(idx<fr_total);
return (AR)(fr0+idx);
}
assert(idx<gr_total);
return (AR)(gr0+idx);
}
/**
* @brief Contructs 'gr' register with the given type index.
*/
inline AR _gr(unsigned idx)
{
return _ar(jobj, idx);
}
/**
* @brief Contructs 'fr' register with the given type index.
*/
inline AR _fr(unsigned idx)
{
return _ar(dbl64, idx);
}
/**
* @brief Returns type index of the given 'fr' register.
*/
inline unsigned fr_idx(AR fr)
{
assert(is_f(fr));
return fr-fr0;
}
/**
* @brief Returns type index of the given 'gr' register.
*/
inline unsigned gr_idx(AR gr)
{
assert(!is_f(gr));
return gr-gr0;
}
/**
* @brief Returns common index of the given register.
*/
inline unsigned ar_idx(AR ar)
{
assert(ar-gr0 < ar_total);
return ar-gr0;
}
/**
* @brief Extracts type index of the given register.
*/
inline unsigned type_idx(AR ar)
{
return is_f(ar) ? fr_idx(ar) : gr_idx(ar);
}
/**
* @brief Arithmetic and logical unit's operations supported by Encoder.
*/
enum ALU {
alu_add, alu_sub, alu_mul, alu_div, alu_rem, alu_or, alu_xor, alu_and,
alu_cmp, alu_test, alu_shl, alu_shr, alu_sar,
/// total number of ALU operations
alu_count
};
/**
* @brief Condition codes used for Encoder::br().
* @see Encoder::br
*/
enum COND {
// signed
ge, le, gt, lt, eq, z=eq, ne, nz=ne,
/// unsigned
ae, be, above, below,
//
cond_none
};
/**
* @brief Condition branches hints.
* @see Encoder::br
*/
enum HINT {
taken, not_taken, hint_none
};
/**
* @brief Returns number of callee-save registers.
*
* The presumption used: the set of callee-save registers is constant
* across a platform and does not depend on calling convention used.
*/
unsigned gen_num_calle_save(void);
/**
* @brief Returns i-th float-point register for register-based calling
* conventions.
*
* The presumption used: the set of registers is constant across a platform
* and does not depend on calling convention used.
* If we'll have to implement calling conventions with different sets of
* registers (i.e. fastcall6 & fastcall4) then this presumption need to
* be revisited.
*/
AR get_cconv_fr(unsigned i, unsigned pos_in_args);
/**
* @brief Returns i-th general-purpose register for register-based calling
* conventions.
* @see get_cconv_fr
*/
AR get_cconv_gr(unsigned i, unsigned pos_in_args);
/**
* @brief Kind of operand.
*/
enum OpndKind { opnd_imm, opnd_mem, opnd_reg };
/**
* @brief Represents an operand the Encoder works with.
*
* The Opnd can represent either immediate integer constant, or a register
* operand, or a memory operand with complex address form [base+index*scale+
* displacement].
*
* Once created, instances of Opnd class are immutable. E.g. to change the
* type of an Opnd instance, one has to create new Opnd with either
* as_type() call, or with Opnd(jtype, const Opnd&).
*
*/
class Opnd {
public:
/**
* @brief Constructs operand with
* kind=#opnd_imm, type == #i32, ival() == 0.
*/
Opnd() { clear(); }
/**
* @brief Clones the given Opnd, but with different type.
* @see as_type
*/
Opnd(jtype jt, const Opnd& that)
{
*this = that; m_jt = jt;
}
/**
* @brief Constructs immediate operand of the given type and
* initializes immediate field with the given value.
*
* The width of any_val is wide enough to fit any necessary value -
* a pointer, #dbl64 or #i64.
*/
Opnd(jtype jt, int_ptr any_val)
{
clear(); m_jt = jt; m_kind = opnd_imm; m_lval = any_val;
}
/**
* @brief Constructs register operand.
*/
Opnd(jtype jt, AR ar)
{
clear(); m_jt = jt; m_kind = opnd_reg; m_reg = ar;
}
/**
* @brief Constructs register operand with a type of max possible width.
*
* That is #jobj for GR registers and #dbl64 for FR registers.
*/
Opnd(AR ar)
{
clear(); m_jt = is_f(ar) ? dbl64 : jobj; m_kind = opnd_reg;
m_reg = ar;
}
/**
* @brief Constructs #i32 immediate operand.
*/
Opnd(int ival)
{
clear(); m_jt = i32; m_lval = ival;
}
/**
* @brief Constructs i32 immediate operand.
*/
Opnd(unsigned ival)
{
clear(); m_jt = i32; m_lval = ival;
}
#ifdef POINTER64
/**
* @brief Constructs #i64 immediate operand.
*
* @note Using Opnd(int_ptr) on 32-bit architecture leads to ambiguity
* with Opnd(int), so Opnd(int_ptr) is under #ifdef.
*/
Opnd(int_ptr lval)
{
clear(); m_jt = iplatf; m_lval = lval;
}
/**
* @brief Constructs i64 immediate operand.
*
* @note Using Opnd(uint_ptr) on 32-bit architecture leads to ambiguity
* with Opnd(unsigned), so Opnd(uint_ptr) is under #ifdef.
*/
Opnd(uint_ptr lval)
{
clear(); m_jt = iplatf; m_lval = lval;
}
#endif
/**
* @brief Constructs memory operand with no type (jvoid).
*/
Opnd(AR base, int disp, AR index = ar_x, unsigned scale=0)
{
clear();
m_kind = opnd_mem; m_jt = jvoid;
m_base = base; m_index = index;
m_scale = scale; m_disp = disp;
}
/**
* @brief Constructs memory operand.
*/
Opnd(jtype jt, AR base, int disp, AR index = ar_x, unsigned scale=0)
{
clear();
m_kind = opnd_mem; m_jt = jt;
m_base = base; m_index = index;
m_scale = scale; m_disp = disp;
}
#ifdef _IA32_
/**
* @brief Constructs memory operand, the given pointer is stored as
* displacement.
* @note IA-32 only.
*/
Opnd(jtype jt, AR base, const void* disp)
{
clear();
m_kind = opnd_mem; m_jt = jt;
m_base = base; m_index = ar_x;
m_scale = 0; m_disp = (int)disp;
}
#endif
/**
* @brief Returns kind of this operand.
*/
OpndKind kind(void) const
{
return m_kind;
}
/**
* @brief Returns type of this operand.
*/
jtype jt(void) const
{
return m_jt;
}
/**
* @brief Tests whether this operand is register operand.
*/
bool is_reg(void) const { return kind() == opnd_reg; }
/**
* @brief Tests whether this operand is memory operand.
*/
bool is_mem(void) const { return kind() == opnd_mem; }
/**
* @brief Tests whether this operand is immediate operand.
*/
bool is_imm(void) const { return kind() == opnd_imm; }
/**
* @brief Tests two operands for equality.
*
* For memory operands, types of operands are ignored (so it only tests
* whether two operands refer to the same memory location).
*
* For immediate operands, types are taken into account (that means
* that zero of i64 type will \b not be equal to zero of i32 type).
*/
bool operator==(const Opnd& that) const
{
if (kind() != that.kind()) return false;
if (is_reg()) return reg() == that.reg();
if (is_mem()) {
// no test for jt() - it's intentional
return base() == that.base() &&
disp() == that.disp() &&
index() == that.index() &&
scale() == that.scale();
}
assert(is_imm());
if (jt() != that.jt()) return false;
return m_lval == that.m_lval;
}
/**
* @brief Operation reversed to operator==.
*/
bool operator!=(const Opnd& that) const { return !(*this==that); }
/**
* @brief Returns AR for register operand, or #ar_x for operands of
* other kinds.
*/
AR reg(void) const { return m_kind == opnd_reg ? m_reg : ar_x; }
/**
* Returns integer value for immediate operand, or 0 for operands of
* other kinds.
*/
int ival(void) const { return is_imm() ? (int)m_lval : 0; }
/**
* Returns long value for immediate operand, or 0 for
* operands of other kinds.
*/
int_ptr lval(void) const { return is_imm() ? m_lval : 0; }
/**
* Returns base register for memory operand, or ar_x for operands of
* other kinds.
*/
AR base(void) const { return m_kind == opnd_mem ? m_base : ar_x; }
/**
* Returns index register for memory operand, or ar_x for operands of
* other kinds.
*/
AR index(void) const { return m_kind == opnd_mem ? m_index : ar_x; }
/**
* Returns displacement of complex address form for memory operand, or
* 0 for operands of other kinds.
*/
int disp(void) const { return m_kind == opnd_mem ? m_disp : 0; }
/**
* Returns scale of complex address form for memory operand, or 0 for
* operands of other kinds.
*/
unsigned scale(void) const { return m_kind == opnd_mem ? m_scale : 0; }
/**
* Returns Opnd which only differs from this Opnd by the type.
* @see Opnd(jtype, const Opnd&)
*/
Opnd as_type(jtype jt) const
{
if (m_jt == jt) {
return *this;
}
Opnd res(*this);
res.m_jt = jt;
return res;
}
private:
/**
* Initializes Opnd instance with default values.
*/
void clear(void)
{
m_kind = opnd_imm;
m_jt = i32;
m_base = m_index = ar_x;
m_disp = 0;
m_lval = 0;
m_scale = 0;
}
/**
* Kind of operand.
*/
OpndKind m_kind;
/**
* Type of operand.
*/
jtype m_jt;
union {
/**
* AR for register operand.
*/
AR m_reg;
/**
* Displacement for memory operand.
*/
int m_disp;
/**
* Integer or long value of immediate operand.
*/
int_ptr m_lval;
};
/**
* Base register for memory operand.
*/
AR m_base;
/**
* Index register for memory operand.
*/
AR m_index;
/**
* Scale for memory operand.
*/
unsigned m_scale;
};
/**
* @brief Generation of code for an abstract CPU.
*
* Class Encoder used to generate CPU instructions in a CPU-independent
* manner.
*
* The Encoder's function set represents an abstract CPU which has 2 sets
* of registers - general-purpose (named GP or GR) and float-point (named
* FP or FR), has memory and memory stack.
*
* The Encoder designed to hide specialties of underlying platform as much
* as possible, so most of characteristics are the same:
* - FR reg may hold both #dbl64 and #flt32
* - FR operations may have either memory or FR reg as second operand, but
* not immediate
* - GR reg is wide enough to carry I_32
* - GR reg is wide enough to carry a memory address
* - a memory may be addressed using complex address form cosists of
* base and index registers, displacement and a scale for index. The
* scale may be any of the following: 1, 2, 4, 8.
*
* Though some differences still exist:
* - GR reg may \b not be wide enough to fit #i64 type (is_big(i64)==true)
* - An arbitrary address may not fit into displacement field of complex
* address form. If is_ia32()==true, then an address always fits into
* displacement.
*
* Special emulations performed for the following cases:
* - (Intel 64) mov [mem64], imm64 -
* the operation is generated as 2 moves of imm32
* - (IA-32) operations that involve 8bit access to EBP, ESI or EDI -
* in this case, the sequence of XCHG reg, reg; operation ; XCHG is
* generated.
* - (all) 'PUSH fr' is emulated as
* 'sub sp, num_of_slots_for(dbl64) ; mov [sp], fr'. 'POP fr' is emulated
* the same way.
* - (IA-32) Only 'mov/ld fp0, mem' and 'mov/st mem, fp0' are
* allowed. In this case, FST/FLD instructions are generated. \b NOTE:
* this simulation is only done in #fld and #fst methods, you can \b not
* do #mov with fp0. This limitation is intentional, to remove
* additional check and branch from the hot exectuion path in #mov.
*
* call() operation is made indirect only (trough a GR register). This is
* done intentionally, to reduce differences in code generation between
* platforms - on IA-32 we alway can do relative CALL, though on Intel 64
* the possibility depends on whether the distance between CALL instruction
* and its target fits into 2Gb. As the code is first generated into
* internal buffer, and then copied to its final location, the distance
* also changes and this may complicate the code generation routine.
* In contrast, the <code>movp(gr, target); call(gr)</code> sequence works
* the same way on all platforms.
*
* The code is generated into internal buffer represented by CodeStream
* object.
*
* The Encoder also have support for \b patching of generated code.
*
* Patching is a process of changing some part of instruction after it has
* been generated. Normally, this is used to finalize addresses that are
* not yet known at the time of code generation (for example, for a forward
* jump).
*
* The following instructions support patching: branches (br(COND) and
* loading address into GR register (#movp).
*
* When the such instruction is generated, then a special \e patch \e record
* is stored in the Encoder internally. The patch record contains some info
* about the instruction - its length, offset, type (data or branch -
* below), whether patching was done for this instruction and so on.
*
* Both methods accept additional user-defined arguments. In no way they
* are interpreted by the Encoder itself, just associated with the
* instruction to be patched. In CodeGen the arguments are used to store
* basic block and instruction's PC.
*
* The method void patch(unsigned pid, void* inst_addr, void* data)
* performs the patching.
*
* \c pid is 'patch id' returned by appropriate #br() or #movp() call. This
* is also the offset of the instruction in the internal Encoder's buffer.
*
* \c inst_addr is the address of instruction to patch and \c data is the
* data to be stored into instruction.
*
* In many cases, \c inst_addr points to the instruction in the internal
* Encoder's buffer, so the short version of patch(unsigned, void*) method
* exists.
*
* There are 2 kinds of patches - \e data and \e branch. The data patch
* is used with instruction that operate with data addresses, e.g.
* <code>mov gr, addr</code>. Branch patch applicable to br() instructions,
* with the presumption that all branches are relative ones.
*
* The key difference is that when patching the \e data, address is stored
* as-is, wihtout modification. When patching a branch, then the offset
* between \c inst_addr and \c data (interpreted as address of target)
* is calculated and the offset is stored into instruction.
*
* @todo FPU-only support, without SSE to work on P3-s. The basic idea
* is to emulate 'mov fr, fr' using FXCH and 'mov fr, mem' and
* 'mov mem, fr' using FLD, FST and FXCH.
*
* @todo IPF support. The basic idea is to hide one or two registers from
* application and use them in Encoder internally to emulate complex address
* form and other operations that are not natively support in IPF's
* instruction set.
*/
class Encoder {
public:
/**
* No op.
*/
Encoder() {
m_trace = false;
}
/**
* Tests whether tracing enabled for this Encoder instance.
* @note Only valid when JIT_TRACE macro is defined. Otherwise always
* returns false.
* @see JIT_TRACE
* @see JET_PROTO
*/
bool is_trace_on(void) const
{
#ifdef JIT_TRACE
return m_trace;
#else
return false;
#endif
}
/**
* Tests whether the AR is callee-save.
*/
static bool is_callee_save(AR ar)
{
return isCalleeSave[ar_idx(ar)];
}
/**
* Generates MOV operation.
*/
void mov(const Opnd& op0, const Opnd& op1)
{
if (is_trace_on()) {
trace(string("mov")+"("+to_str(op0.jt())+")",
to_str(op0), to_str(op1));
}
mov_impl(op0, op1);
}
/**
* Generates load of constant address into GR register.
* @see movp(AR, unsigned, unsigned)
*/
void movp(AR op0, const void *op1)
{
assert(op0 != ar_x);
assert(is_gr(op0));
if (is_trace_on()) {
trace("movP", to_str(op0), to_str(op1));
}
movp_impl(op0, op1);
}
/**
* Generates load of an address into GR register, for further patching.
* @param gr - register to load
* @param udata - user data (not interpreted by Encoder)
* @param ubase - user data (not interpreted by Encoder)
* @see movp(AR, const void*)
*/
unsigned movp(AR gr, unsigned udata, unsigned ubase);
/**
* Generates load of an address specified by mem argument into the reg
* argument.
* @note \c reg must be register and \c mem can only be memory operand.
*/
void lea(const Opnd& reg, const Opnd& mem);
/**
* Generates sign extension of I_8 from op1 into op0.
*/
void sx1(const Opnd& op0, const Opnd& op1);
/**
* Generates sign extension of int16 from op1 into op0.
*/
void sx2(const Opnd& op0, const Opnd& op1);
/**
* Generates sign extension op1 into op0.
*/
void sx(const Opnd& op0, const Opnd& op1);
/**
* Generates zero extension of U_8 from op1 into op0.
*/
void zx1(const Opnd& op0, const Opnd& op1);
/**
* Generates zero extension of uint16 from op1 into op0.
*/
void zx2(const Opnd& op0, const Opnd& op1);
/**
* Generates ALU operation.
*/
void alu(ALU alu, const Opnd& op0, const Opnd& op1)
{
if (is_trace_on()) {
trace(to_str(alu), to_str(op0), to_str(op1));
}
alu_impl(alu, op0, op1);
}
/**
* Generates n-byte long NOP instruction.
*/
void nop(U_32 n) {
if (is_trace_on()) {
trace(string("nop"), to_str((int)n), string());
}
nop_impl(n);
}
/**
* Performs bitwise NOT operation.
*/
void bitwise_not(const Opnd& op0) {
if (is_trace_on()) {
trace(string("not"), to_str(op0), to_str(""));
}
not_impl(op0);
}
/**
* Generates CMOVxx operation.
*/
void cmovcc(COND cond, const Opnd& op0, const Opnd& op1)
{
if (is_trace_on()) {
trace(string("cmov:")+ to_str(cond), to_str(op0), to_str(op1));
}
cmovcc_impl(cond, op0, op1);
}
/**
* Generates CMPXCHG operation.
*/
void cmpxchg(bool lockPrefix, AR addrBaseReg, AR newReg, AR oldReg)
{
if (is_trace_on()) {
trace(string("cmpxchg:")+ (lockPrefix ? "(locked) ":"") + to_str(addrBaseReg), to_str(newReg), to_str(oldReg));
}
cmpxchg_impl(lockPrefix, addrBaseReg, newReg, oldReg);
}
/**
* Generates write for 64-bit volatile value
*/
void volatile64_set(Opnd& where, AR hi_part, AR lo_part)
{
if (is_trace_on()) {
trace(string("volatile64_set:") + to_str(where), to_str(hi_part), to_str(lo_part));
}
volatile64_op_impl(where, hi_part, lo_part, true);
}
/**
* Generates read for 64-bit volatile value
*/
void volatile64_get(Opnd& where, AR hi_part, AR lo_part)
{
if (is_trace_on()) {
trace(string("volatile64_get:") + to_str(where), to_str(hi_part), to_str(lo_part));
}
volatile64_op_impl(where, hi_part, lo_part, false);
}
/**
* Generates ALU operation between two registers.
*
* The registers are used as \c jt type.
*/
void alu(jtype jt, ALU op, AR op0, AR op1)
{
alu(op, Opnd(jt, op0), Opnd(jt, op1));
}
/**
* Loads from memory into the specified register.
*
* Just a wrapper around mov().
* @note On IA32 fp0 loads are threated in a special way.
*/
void ld(jtype jt, AR ar, AR base, int disp=0, AR index = ar_x,
unsigned scale=0)
{
if (is_f(jt)) {
fld(jt, ar, base, disp, index, scale);
}
else {
mov(Opnd(jt, ar), Opnd(jt, base, disp, index, scale));
}
}
/**
* Stores from the specified register into memory .
* Just a wrapper around mov().
*/
void st(jtype jt, AR ar, AR base, int disp=0, AR index = gr_x,
unsigned scale=0)
{
if (is_f(jt)) {
fst(jt, ar, base, disp, index, scale);
}
else {
mov(Opnd(jt, base, disp, index, scale), Opnd(jt, ar));
}
}
/**
* Loads from memory into the specified FR register.
*
* Just a wrapper around mov().
* @note On IA32 fp0 loads are threated in a special way.
*/
void fld(jtype jt, AR ar, AR base, int disp=0, AR index = ar_x,
unsigned scale=0);
/**
* Stores from the specified FR register into memory .
*
* Just a wrapper around mov().
* @note On IA32 fp0 stores are threated in a special way.
*/
void fst(jtype jt, AR ar, AR base, int disp=0, AR index = gr_x,
unsigned scale=0);
/**
* Loads 8bit from memory into GR register.
*/
void ld1(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
ld(i8, ar, base, disp, ridx, scale);
}
/**
* Loads 16bit from memory into GR register.
*/
void ld2(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
ld(i16, ar, base, disp, ridx, scale);
}
/**
* Loads 32bit from memory into a register.
*/
void ld4(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
ld(is_fr(ar) ? flt32 : i32, ar, base, disp, ridx, scale);
}
/**
* Stores 8bit from GR register into memory.
*/
void st1(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
st(i8, ar, base, disp, ridx, scale);
}
/**
* Stores 16bit from GR register into memory.
*/
void st2(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
st(i16, ar, base, disp, ridx, scale);
}
/**
* Stores 32bit from GR or FR register into memory.
*/
void st4(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
{
st(is_fr(ar) ? flt32 : i32, ar, base, disp, ridx, scale);
}
/**
* Pushes the value onto stack.
* @note Push of FR registers is emulated (sub sp, n ; mov [sp], fr).
* @return Number of bytes spent from the stack - the number
* subtracted from #sp .
*/
int push(const Opnd& op0);
/**
* Pops out the value from stack.
* @note Pop of FR registers is emulated (mov fr, [sp] ; add sp, n).
* @return Number of bytes popped from the stack - the number added to
* #sp.
*/
int pop(const Opnd& op0);
/**
* Returns number of bytes needed to store all the registers.
*/
static unsigned get_all_regs_size(void)
{
return gr_num*STACK_SLOT_SIZE + fr_num*8;
}
/**
* Pushes either all or all scratch registers onto stack.
* Number of bytes spent on stack is always rounded to 16.
*/
int push_all(bool includeCalleeSave=false);
/**
* Pops out either all or all scratch registers from stack.
* Number of bytes popped from stack is always rounded to 16.
*/
int pop_all(bool includeCalleeSave=false);
/**
* Generates return instruction.
* @param pop_bytes - how many bytes to pop out from the stack after
* return.
*/
void ret(unsigned pop_bytes);
/**
* Generates indirect call instruction.
*
* If calling convention assumes that caller restores stack, then it
* also generates code to restore stack.
*
* If check_stack is \c true and calling conventions obliges stack
* alignment, then a code that checks this alignment is also generated.
* trap() instruction is executed if alignment requirement not met.
*/
void call(const Opnd& target, const CallSig& ci,
bool check_stack = false);
/**
* Generates indirect call to \c target trough the specified register.
* May place constant arguments according to \c cs. \c idx parameter
* specifies which argument to start from. If all arguments are already
* prepared, then set <code>idx = cs.count()</code>.
*
* If \c idx is 0 and any argument is passed via stack, then stack
* preparation sequence is generated (<code>sub sp, cs.size()</code>).
*
* If calling convention assumes that caller restores stack, then
* the proper instructions are generated.
*
* If check_stack is \c true and calling conventions obliges stack
* alignment, then a code that checks this alignment is also generated.
* trap() instruction is executed if alignment requirement not met.
*/
void call(bool check_stack, AR gr, const void * target,
const CallSig& cs, unsigned idx, ...);
/**
* Same as call(...) but takes arguments to pass from \c va_list.
*/
void call_va(bool check_stack, AR ar, const void *target,
const CallSig& cs, unsigned idx, va_list& valist);
/**
*
* @todo the name may be somehow confusing with CodeGen's one, may
* think about renaming.
*/
void gen_args(const CallSig& cs, AR grtmp, unsigned idx, unsigned count, ...);
/**
* Generates conditional or unconditional branch.
* @param op - target operand
* @param cond - condition for conditional branch or cond_none
* @param hint - possible hint whether conditional branch is presumed
* to be taken or not
*/
void br(const Opnd& op, COND cond=cond_none, HINT hint=hint_none);
/**
* Generates conditional or unconditional branch for further patching.
* @param cond - condition for conditional branch or cond_none
* @param udata - user data (not interpreted by Encoder)
* @param ubase - user data (not interpreted by Encoder)
* @param hint - possible hint whenether conditional branch is presumed
* to be taken or not
* @return patching id (which is also ip offset of generated branch
* instruction)
*/
unsigned br(COND cond, unsigned udata, unsigned ubase,
HINT hint=hint_none);
/**
* Generates software breakpoint.
*/
void trap(void);
/**
* Triggers software breakpoint.
* @note The method does \b not generate software break point, but
* raises it in the current program instead - in platform
* dependent manner. On Win it's DebugBreak() and it's
* raise(SIGTRAP) on Linux.
* @note To generate software break point use trap().
* @see trap
*/
static void debug(void);
/**
* @brief Returns current offset in the Encoder's internal buffer.
* That is the offset where next instruction will be generated.
*/
unsigned ipoff(void) const
{
return m_codeStream.ipoff();
}
/**
* @brief Returns number of patch records registered in current Encoder.
*/
unsigned patch_count(void) const
{
return (unsigned) m_patches.size();
}
/**
* @brief Returns info about next patch record.
* @param[out] ppid - patch id (which is also offset of instruction
* in the Encoder's internal buffer)
* @param[out] pudata - user data 1
* @param[out] pubase - user data 1
* @param[out] pdone - \b true if the instruction was patched already
* @returns \b true if the patch record is for data instruction, \b
* false for branch instruction.
*/
bool enum_patch_data(unsigned* ppid, unsigned* pudata,
unsigned* pubase, bool* pdone)
{
*ppid = iter->first;
const CodePatchItem& cpi = iter->second;
*pudata = cpi.udata;
*pubase = cpi.ubase;
*pdone = cpi.done;
return cpi.data;
}
/**
* @brief Begins enumeration of patch records.
*/
void * enum_start(void)
{
iter = m_patches.begin();
return NULL; //(void*)&i;
}
/**
* @brief Returns \b true if no more items to enumrate remains.
*/
bool enum_is_end(void *h)
{
return iter == m_patches.end();
}
/**
* @brief Advances enumeration iterator on next item.
*/
void enum_next(void * h)
{
assert(iter != m_patches.end());
++iter;
}
/**
* @brief Patch the given by \c pid instruction in the Encoder's
* internal buffer.
*/
void patch(unsigned pid, void * data)
{
// pid is also ipoff of the instruction
void * inst_addr = ip(pid);
patch(pid, inst_addr, data);
}
void patch(unsigned pid, void* inst_addr, void* data);
/**
* Returns a current 'ip' for underlying code stream - that is
* an 'ip' where the next emitted instruction will begin.
* The ip returned is a pointer to an internal temporary code buffer.
*/
char * ip(void)
{
return m_codeStream.ip();
}
/**
* @brief Returns address in Encoder's internal buffer by the given
* offset.
*/
char * ip(unsigned ipoff)
{
return m_codeStream.ip(ipoff);
}
/**
* Sets a current ip for the internal code buffer.
*/
void ip(char * _ip)
{
m_codeStream.ip(_ip);
}
protected:
unsigned m_trace;
public:
/**
* Initializes internal Encoder's data.
* Must be invoked before any usage of Encoder.
*/
static void init(void);
public:
/**
* Formats the given \c op into human-readable string.
*/
static string to_str(const Opnd& op);
/**
* Formats the given \c ar into human-readable string.
*
* Callee-save registers are presented in capital letters.
* @param ar - register to convert to string.
* @param platf - if \c true, then a native (e.g. EAX) returned instead
* of abstract one (e.g. gr0).
*/
static string to_str(AR ar, bool platf = false);
/**
* Formats the given complex address from into human-readable string.
*/
static string to_str(AR base, int disp, AR index, unsigned scale);
/**
* Formats the given \c addr into human-readable string.
*/
static string to_str(const void * addr);
/**
* Formats the given integer into human-readable string.
*/
static string to_str(int i);
/**
* Formats the given ALU code into human-readable string.
*/
static string to_str(ALU op);
/**
* Formats the given condition code into human-readable string.
*/
static string to_str(COND cond);
/**
* Formats the given HINT into human-readable string.
*/
static string to_str(HINT hint);
/**
* Formats the given jtype into human-readable string.
*/
static string to_str(jtype jt);
protected:
void trace(const string& func, const string& op0, const string& op1);
/**
* Used to beautify debugging output for complex code sequences like
* push_all().
*/
string m_prefix;
/**
* An internal temporary buffer where the generated code is accumulated.
* Normally not to be used directly, but instead through ip() methods calls.
*/
CodeStream m_codeStream;
private:
/**
* Patch record.
*/
struct CodePatchItem {
/// length
unsigned len;
/// data or branch instruction
bool data;
/// \b true if instruction was patched
bool done;
/// user data 1
unsigned udata;
/// user data 2
unsigned ubase;
};
/**
* Map of patch records.
*/
typedef map<unsigned, CodePatchItem> PATCH_MAP;
/**
* Storage of patch records.
*/
PATCH_MAP m_patches;
/**
* Iterator used during enumeration of patch records.
*/
PATCH_MAP::iterator iter;
/**
* Set of flags which registers are callee-save.
*/
static bitset<ar_num> isCalleeSave;
/**
* Creates patch record for current ipoff().
*/
unsigned reg_patch(bool data, unsigned udata, unsigned ubase_ipoff);
/**
* Finalizes current patch record (stores instruction length, etc).
*/
void reg_patch_end(unsigned pid);
//
// Platform-specific implementations
//
/// Implementation of mov().
void mov_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of not().
void not_impl(const Opnd& op0);
/// Implementation of alu().
void alu_impl(ALU op, const Opnd& op0, const Opnd& op1);
//Implementation of nop()
void nop_impl(U_32 n);
/// Implementation of cmovcc().
void cmovcc_impl(COND c, const Opnd& op0, const Opnd& op1);
/// Implementation of cmpxchg().
void cmpxchg_impl(bool lockPrefix, AR addrReg, AR newReg, AR oldReg);
/// Implementation of volatile64 get and set ops().
void volatile64_op_impl(Opnd& where, AR hi_part, AR lo_part, bool is_put);
/// Implementation of lea().
void lea_impl(const Opnd& reg, const Opnd& mem);
/// Implementation of movp().
void movp_impl(AR op0, const void *);
/// Implementation of sx1().
void sx1_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of sx2().
void sx2_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of sx().
void sx_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of zx1().
void zx1_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of zx2().
void zx2_impl(const Opnd& op0, const Opnd& op1);
/// Implementation of fld().
void fld_impl(jtype jt, AR op0, AR base, int disp, AR index,
unsigned scale);
/// Implementation of fst().
void fst_impl(jtype jt, AR op0, AR base, int disp, AR index,
unsigned scale);
/// Implementation of push().
int push_impl(const Opnd& op0);
/// Implementation of pop().
int pop_impl(const Opnd& op0);
/// Implementation of call().
void call_impl(const Opnd& target);
/// Implementation of ret().
void ret_impl(unsigned pop);
/// Implementation of br().
void br_impl(COND cond, HINT hint);
/// Implementation of br().
void br_impl(const Opnd& op, COND cond, HINT hint);
/// Converts \c ar into platform's register name.
static string to_str_impl(AR ar);
/// Implementation of trap().
void trap_impl(void);
//
static bool is_callee_save_impl(AR gr);
};
/**
* Returns \b true if the \c ar is callee-save register.
*/
inline bool is_callee_save(AR ar)
{
return Encoder::is_callee_save(ar);
}
}
}; // ~namespace Jitrino::Jet
#endif // __ENC_H_INCLUDED__