vm/jitrino/src/jet/enc.h - harmony-drlvm - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */
 /**
  * @author Alexander Astapchuk
  */

 /**
  * @file
  * @brief Encoder class and related definitions.
  */

 #if !defined(__ENC_H_INCLUDED__)
 #define __ENC_H_INCLUDED__

 #include "jdefs.h"
 #include <assert.h>
 #include <stdarg.h>

 #include <bitset>
 #include <string>
 #include <map>
 #include <algorithm>

 using std::string;
 using std::bitset;
 using std::map;
 using std::max;

 /**
  * @brief Size of stack slot affected by a regular PUSH/POP instruction.
  *
  * Normally equals to the size of general-purpose register of the platform.
  */
 #define STACK_SLOT_SIZE (sizeof(void*))

 /**
  * @brief Rounds given \c bytes to the integer number of stack slots.
  */
 #define STACK_SIZE(bytes) \
                     ((bytes + (STACK_SLOT_SIZE-1)) & ~(STACK_SLOT_SIZE-1))

 namespace Jitrino {
 namespace Jet {

 class CallSig;
 class Encoder;
 /**
  * @brief A signed integer type, with the same size as a pointer.
  */
 typedef POINTER_SIZE_SINT int_ptr;
 typedef POINTER_SIZE_INT uint_ptr;

 /**
  * @brief A dynamically grown byte array.
  *
  * Class CodeStream represents a dynamically growing byte array, which
  * always provides buffer of at least minimal guaranteed size (which is
  * #BUFFER_ZONE).
  *
  * The usage is as follows:
  * @code
  *      CodeStream cs;
  *      cs.init(INITIALLY_EXPECTED_CODE_SIZE);
  *      unsigned p_start = cs.ipoff();
  *      char * p = cs.ip();
  *      memcpy(p, otherP, someSize_less_than_BUFFER_ZONE);
  *      cs.ip(p + someSize_less_than_BUFFER_ZONE);
  *      ...
  *      unsigned next_p = cs.ipoff();
  * @endcode
  */
 class CodeStream {
 public:
     CodeStream()    { m_buf = NULL; m_size = 0; }
     ~CodeStream()   { if (m_buf) { free(m_buf); } };

     /**
      * @brief Performs initial memory allocation.
      *
      * The memory size allocated is 'bytes' and then grow when necessary.
      */
     void init(unsigned bytes)
     {
         resize(bytes);
     }


     /**
      * @brief Returns address of a next available for writing byte.
      *
      * The address returned is guaranteed to contain at least #BUFFER_ZONE
      * bytes.
      * The returned address is valid only until the next call to #ip(void)
      * where a memory reallocation can be triggered, and thus should not be stored between
      * such calls. Use ipoff() instead which is consistent during the lifetime of
      * CodeStream object.
      */
     char *  ip(void)
     {
         return m_buf + m_size;
     }

     char * ip(unsigned ipoff)
     {
         return data() + ipoff;
     }

     /**
      * Sets current address. This must be an address of a next available byte.
      */
     void    ip(char * _ip)
     {
         assert((U_32)(_ip - m_buf) == (uint64)(_ip - m_buf));
         m_size = (U_32)(_ip - m_buf);
         assert(m_size < total_size);
         // Need to be done here, and not in ip(void).
         // Otherwise, the following usage template:
         //  patch(pid, m_codeStream.data() + br_ipoff, ip())
         // may fail, if ip(void) triggers reallocation
         if ((total_size - m_size) < BUFFER_ZONE) {
             resize(total_size +
                    max((unsigned)BUFFER_ZONE, total_size*GROW_RATE/100));
         }
     }

     /**
      * Returns an offset the next available byte in the stream.
      */
     unsigned    ipoff(void) const
     {
         return m_size;
     }

     /**
      * Returns the size used in the stream.
      */
     unsigned    size(void) const
     {
         return m_size;
     }

     /**
      * Provides a direct access to internal buffer. Never use more than
      * size() bytes.
      */
     char *  data(void) const
     {
         return m_buf;
     }

     /**
      * The minimum guaranteed size of the buffer returned by ip().
      * This is also a minimal size of the buffer which triggers
      * reallocation of a bigger memory buf. '16' here is max size of the
      * native instruction (at least on IA32/EM64T). 3 was chosen empirically.
      */
     enum { BUFFER_ZONE = 16*3 };
 private:
     /**
      * Perform the [re-]allocation of a memory.
      * The previously filled memory (if any) is copied into the newly allocated buffer.
      */
     void resize(unsigned how_much)
     {
         total_size = max(how_much, (unsigned)BUFFER_ZONE);
         m_buf = (char*)realloc(m_buf, total_size);
     }

     /**
      * A pointer to the allocated buffer.
      */
     char *      m_buf;

     /**
      * A size of the buffer allocated.
      */
     unsigned    total_size;

     /**
      * A size of memory currently in use.
      */
     unsigned    m_size;

     /**
      * A rate how to increase the already allocated buffer, in percent.
      * The default value 25 means that the buffer will grow by 25% each allocation:
      * i.e. if the first size passed to init() was 32, the allocations will be:
      * 32 ; 40 (=32+32*0.25) ; 50 (=40+50*0.25) etc.
      */
     static const unsigned   GROW_RATE = 25;
 };

 /**
  * @brief Tests whether specified #jtype is too big to fit into a single
  *        register on the current platform.
  *
  * The only case currently is i64 on IA32.
  *
  * This is a characteristics of the platform, so it's placed in enc.h.
  */
 inline bool is_big(jtype jt)
 {
 #if defined(_EM64T_) || defined(_IPF_)
     return false;
 #else
     return jt==i64;
 #endif
 }

 /**
  * @brief Returns a #jtype used to move an item of specified type (or part
  *        of the item - in case of big type).
  *
  * The only case currently is that jtmov() returns i32 for jt=i64.
  */
 inline jtype jtmov(jtype jt)
 {
     return is_big(jt) ? i32 : jt;
 }

 /**
  * @brief true if current platform is IA32.
  *
  * Some presumptions are made about IA32 platform: there are no comressed
  * references on it (not directly related to Encoder), an address fits into
  * 32 bits and fits into displacement of complex address form.
  */
 inline bool is_ia32(void)
 {
 #if defined(_IA32_)
     return true;
 #else
     return false;
 #endif
 }

 /**
  * @brief Tests whether the specified value fits into 32 bit value.
  */
 inline bool fits32(jlong val)
 {
     return (INT_MIN <= val) && (val <= INT_MAX);
 }


 /**
  * @brief Tests whether the specified address fits into 32 bit value.
  *
  * Always true on IA32. Not always true on 64 bit platforms.
  */
 inline bool fits32(const void* addr)
 {
 #ifdef _IA32_
     return true;
 #else
     return fits32((jlong)(int_ptr)addr);
 #endif
 }

 /**
  * @brief
  *
  * AR stands for Abstract Register.
  *
  * Every register may be uniquely identified either by its unique index
  * (common index) or by a combination of type and index (which is another -
  * type - index).
  *
  * The common index is unique for each register, and lies in the range of
  * (ar_idx(ar_x);ar_num), exclusive.
  *
  * The type index is unique only within the given group of registers and
  * lies in the range of [gr_idx(gr0); gr_idx(gr0+gr_total-1)] for gr
  * registers and [fr_idx(fr0); fr_idx(fr0+fr_total-1)], inclusive.
  *
  */
 enum AR {
     ar_x, fr_x = ar_x, gr_x = ar_x,
     //
     // General-purpose registers
     //
     // EAX, EBX, ECX, EDX, ESI, EDI,
     gr0, gr1, gr2, gr3, gr4, gr5,
 #ifdef _EM64T_
     // R8, R9, R10, R11, R12, R13, R14, R15
     gr6, gr7, gr8, gr9, gr10, gr11, gr12, gr13,
 #endif
     bp, sp,
     //
     // Float-point registers
     //
     fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
 #ifdef _EM64T_
     fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15,
 #endif
     //
     // Specials
     //
     fp0, // top FPU stacked register
     //
 #ifdef _EM64T_
     gr_num=15,      /// not including sp
     gr_total = 16,  /// including sp
     fr_num=16,
 #else
     gr_num=7,       /// not including sp
     gr_total = 8,   /// including sp
     fr_num=8,
 #endif
     fr_total=fr_num,
     ar_total = fr_total + gr_total, ar_num = ar_total,
 };

 /**
  * @brief Returns true if the register is float-point register.
  */
 inline bool is_f(AR ar)
 {
     return (fr0 <= ar && ar < (fr0+fr_total));
 }

 /**
  * @brief Returns true if the register is float-point register.
  */
 inline bool is_fr(AR ar)
 {
     return is_f(ar);
 }

 /**
  * @brief Returns true if the register is general-purpose register.
  */
 inline bool is_gr(AR ar)
 {
     return (gr0 <= ar && ar < (gr0+gr_total));
 }

 /**
  * @brief Constructs AR from with the given index (common index).
  */
 inline AR _ar(unsigned idx)
 {
     assert(idx<ar_total);
     return (AR)(gr0+idx);
 }

 /**
  * @brief Constructs AR from the given jtype and register index (type index).
  */
 inline AR _ar(jtype jt, unsigned idx)
 {
     if (is_f(jt)) {
         assert(idx<fr_total);
         return (AR)(fr0+idx);
     }
     assert(idx<gr_total);
     return (AR)(gr0+idx);
 }

 /**
  * @brief Contructs 'gr' register with the given type index.
  */
 inline AR _gr(unsigned idx)
 {
     return _ar(jobj, idx);
 }

 /**
  * @brief Contructs 'fr' register with the given type index.
  */
 inline AR _fr(unsigned idx)
 {
     return _ar(dbl64, idx);
 }

 /**
  * @brief Returns type index of the given 'fr' register.
  */
 inline unsigned fr_idx(AR fr)
 {
     assert(is_f(fr));
     return fr-fr0;
 }

 /**
  * @brief Returns type index of the given 'gr' register.
  */
 inline unsigned gr_idx(AR gr)
 {
     assert(!is_f(gr));
     return gr-gr0;
 }

 /**
  * @brief Returns common index of the given register.
  */
 inline unsigned ar_idx(AR ar)
 {
     assert(ar-gr0 < ar_total);
     return ar-gr0;
 }

 /**
  * @brief Extracts type index of the given register.
  */
 inline unsigned type_idx(AR ar)
 {
     return is_f(ar) ? fr_idx(ar) : gr_idx(ar);
 }

 /**
  * @brief Arithmetic and logical unit's operations supported by Encoder.
  */
 enum ALU {
     alu_add, alu_sub, alu_mul, alu_div, alu_rem, alu_or, alu_xor, alu_and,
     alu_cmp, alu_test, alu_shl, alu_shr, alu_sar,
     /// total number of ALU operations
     alu_count
 };

 /**
  * @brief Condition codes used for Encoder::br().
  * @see Encoder::br
  */
 enum COND {
     // signed
     ge, le, gt, lt, eq, z=eq, ne, nz=ne,
     /// unsigned
     ae, be, above, below,
     //
     cond_none
 };

 /**
  * @brief Condition branches hints.
  * @see Encoder::br
  */
 enum HINT {
     taken, not_taken, hint_none
 };

 /**
  * @brief Returns number of callee-save registers.
  *
  * The presumption used: the set of callee-save registers is constant
  * across a platform and does not depend on calling convention used.
  */
 unsigned gen_num_calle_save(void);
 /**
  * @brief Returns i-th float-point register for register-based calling
  *        conventions.
  *
  * The presumption used: the set of registers is constant across a platform
  * and does not depend on calling convention used.
  * If we'll have to implement calling conventions with different sets of
  * registers (i.e. fastcall6 & fastcall4) then this presumption need to
  * be revisited.
  */
 AR get_cconv_fr(unsigned i, unsigned pos_in_args);
 /**
  * @brief Returns i-th general-purpose register for register-based calling
  *        conventions.
  * @see get_cconv_fr
  */
 AR get_cconv_gr(unsigned i, unsigned pos_in_args);

 /**
  * @brief Kind of operand.
  */
 enum OpndKind { opnd_imm, opnd_mem, opnd_reg };

 /**
  * @brief Represents an operand the Encoder works with.
  *
  * The Opnd can represent either immediate integer constant, or a register
  * operand, or a memory operand with complex address form [base+index*scale+
  * displacement].
  *
  * Once created, instances of Opnd class are immutable. E.g. to change the
  * type of an Opnd instance, one has to create new Opnd with either
  * as_type() call, or with Opnd(jtype, const Opnd&).
  *
  */
 class Opnd {
 public:
     /**
      * @brief Constructs operand with
      *        kind=#opnd_imm, type == #i32, ival() == 0.
      */
     Opnd() { clear(); }

     /**
      * @brief Clones the given Opnd, but with different type.
      * @see as_type
      */
     Opnd(jtype jt, const Opnd& that)
     {
         *this = that; m_jt = jt;
     }

     /**
      * @brief Constructs immediate operand of the given type and
      *        initializes immediate field with the given value.
      *
      * The width of any_val is wide enough to fit any necessary value -
      * a pointer, #dbl64 or #i64.
      */
     Opnd(jtype jt, int_ptr any_val)
     {
         clear(); m_jt = jt; m_kind = opnd_imm; m_lval = any_val;
     }

     /**
      * @brief Constructs register operand.
      */
     Opnd(jtype jt, AR ar)
     {
         clear(); m_jt = jt; m_kind = opnd_reg; m_reg = ar;
     }

     /**
      * @brief Constructs register operand with a type of max possible width.
      *
      * That is #jobj for GR registers and #dbl64 for FR registers.
      */
     Opnd(AR ar)
     {
         clear(); m_jt = is_f(ar) ? dbl64 : jobj; m_kind = opnd_reg;
         m_reg = ar;
     }

     /**
      * @brief Constructs #i32 immediate operand.
      */
     Opnd(int ival)
     {
         clear(); m_jt = i32; m_lval = ival;
     }

     /**
      * @brief Constructs i32 immediate operand.
      */
     Opnd(unsigned ival)
     {
         clear(); m_jt = i32; m_lval = ival;
     }

 #ifdef POINTER64
     /**
      * @brief Constructs #i64 immediate operand.
      *
      * @note Using Opnd(int_ptr) on 32-bit architecture leads to ambiguity
      * with Opnd(int), so Opnd(int_ptr) is under #ifdef.
      */
     Opnd(int_ptr lval)
     {
         clear(); m_jt = iplatf; m_lval = lval;
     }

     /**
      * @brief Constructs i64 immediate operand.
      *
      * @note Using Opnd(uint_ptr) on 32-bit architecture leads to ambiguity
      * with Opnd(unsigned), so Opnd(uint_ptr) is under #ifdef.
      */
     Opnd(uint_ptr lval)
     {
         clear(); m_jt = iplatf; m_lval = lval;
     }
 #endif

     /**
      * @brief Constructs memory operand with no type (jvoid).
      */
     Opnd(AR base, int disp, AR index = ar_x, unsigned scale=0)
     {
         clear();
         m_kind = opnd_mem; m_jt = jvoid;
         m_base = base; m_index = index;
         m_scale = scale; m_disp = disp;
     }

     /**
      * @brief Constructs memory operand.
      */
     Opnd(jtype jt, AR base, int disp, AR index = ar_x, unsigned scale=0)
     {
         clear();
         m_kind = opnd_mem; m_jt = jt;
         m_base = base; m_index = index;
         m_scale = scale; m_disp = disp;
     }
 #ifdef _IA32_
     /**
      * @brief Constructs memory operand, the given pointer is stored as
      *        displacement.
      * @note IA-32 only.
      */
     Opnd(jtype jt, AR base, const void* disp)
     {
         clear();
         m_kind = opnd_mem; m_jt = jt;
         m_base = base; m_index = ar_x;
         m_scale = 0; m_disp = (int)disp;
     }
 #endif

     /**
      * @brief Returns kind of this operand.
      */
     OpndKind kind(void) const
     {
         return m_kind;
     }

     /**
      * @brief Returns type of this operand.
      */
     jtype jt(void) const
     {
         return m_jt;
     }
     /**
      * @brief Tests whether this operand is register operand.
      */
     bool is_reg(void) const { return kind() == opnd_reg; }
     /**
      * @brief Tests whether this operand is memory operand.
      */
     bool is_mem(void) const { return kind() == opnd_mem; }
     /**
      * @brief Tests whether this operand is immediate operand.
      */
     bool is_imm(void) const { return kind() == opnd_imm; }

     /**
      * @brief Tests two operands for equality.
      *
      * For memory operands, types of operands are ignored (so it only tests
      * whether two operands refer to the same memory location).
      *
      * For immediate operands, types are taken into account (that means
      * that zero of i64 type will \b not be equal to zero of i32 type).
      */
     bool operator==(const Opnd& that) const
     {
         if (kind() != that.kind()) return false;
         if (is_reg()) return reg() == that.reg();
         if (is_mem()) {
             // no test for jt() - it's intentional
             return base() == that.base() &&
                    disp() == that.disp() &&
                    index() == that.index() &&
                    scale() == that.scale();
         }
         assert(is_imm());
         if (jt() != that.jt()) return false;
         return m_lval == that.m_lval;
     }

     /**
      * @brief Operation reversed to operator==.
      */
     bool operator!=(const Opnd& that) const { return !(*this==that); }

     /**
      * @brief Returns AR for register operand, or #ar_x for operands of
      *        other kinds.
      */
     AR reg(void) const { return m_kind == opnd_reg ? m_reg : ar_x; }
     /**
      * Returns integer value for immediate operand, or 0 for operands of
      * other kinds.
      */
     int    ival(void) const { return  is_imm() ? (int)m_lval : 0; }
     /**
      * Returns long value for immediate operand, or 0 for
      *        operands of other kinds.
      */
     int_ptr lval(void) const { return is_imm() ? m_lval : 0; }

     /**
      * Returns base register for memory operand, or ar_x for operands of
      * other kinds.
      */
     AR base(void) const { return m_kind == opnd_mem ? m_base : ar_x; }

     /**
      * Returns index register for memory operand, or ar_x for operands of
      * other kinds.
      */
     AR index(void) const { return m_kind == opnd_mem ? m_index : ar_x; }
     /**
      * Returns displacement of complex address form for memory operand, or
      * 0 for operands of other kinds.
      */
     int disp(void) const { return m_kind == opnd_mem ? m_disp : 0; }
     /**
      * Returns scale of complex address form for memory operand, or 0 for
      * operands of other kinds.
      */
     unsigned scale(void) const { return m_kind == opnd_mem ? m_scale : 0; }
     /**
      * Returns Opnd which only differs from this Opnd by the type.
      * @see Opnd(jtype, const Opnd&)
      */
     Opnd as_type(jtype jt) const
     {
         if (m_jt == jt) {
             return *this;
         }
         Opnd res(*this);
         res.m_jt = jt;
         return res;
     }
 private:
     /**
      * Initializes Opnd instance with default values.
      */
     void clear(void)
     {
         m_kind = opnd_imm;
         m_jt = i32;
         m_base = m_index = ar_x;
         m_disp = 0;
         m_lval = 0;
         m_scale = 0;
     }
     /**
      * Kind of operand.
      */
     OpndKind    m_kind;
     /**
      * Type of operand.
      */
     jtype       m_jt;
     union {
         /**
          * AR for register operand.
          */
         AR          m_reg;
         /**
          * Displacement for memory operand.
          */
         int         m_disp;
         /**
          * Integer or long value of immediate operand.
          */
         int_ptr     m_lval;
     };
     /**
      * Base register for memory operand.
      */
     AR          m_base;
     /**
      * Index register for memory operand.
      */
     AR          m_index;
     /**
      * Scale for memory operand.
      */
     unsigned    m_scale;
 };

 /**
  * @brief Generation of code for an abstract CPU.
  *
  * Class Encoder used to generate CPU instructions in a CPU-independent
  * manner.
  *
  * The Encoder's function set represents an abstract CPU which has 2 sets
  * of registers - general-purpose (named GP or GR) and float-point (named
  * FP or FR), has memory and memory stack.
  *
  * The Encoder designed to hide specialties of underlying platform as much
  * as possible, so most of characteristics are the same:
  *  - FR reg may hold both #dbl64 and #flt32
  *  - FR operations may have either memory or FR reg as second operand, but
  *  not immediate
  *  - GR reg is wide enough to carry I_32
  *  - GR reg is wide enough to carry a memory address
  *  - a memory may be addressed using complex address form cosists of
  *      base and index registers, displacement and a scale for index. The
  *      scale may be any of the following: 1, 2, 4, 8.
  *
  * Though some differences still exist:
  *  - GR reg may \b not be wide enough to fit #i64 type (is_big(i64)==true)
  *  - An arbitrary address may not fit into displacement field of complex
  *  address form. If is_ia32()==true, then an address always fits into
  *  displacement.
  *
  * Special emulations performed for the following cases:
  *  - (Intel 64) mov [mem64], imm64 -
  *      the operation is generated as 2 moves of imm32
  *  - (IA-32) operations that involve 8bit access to EBP, ESI or EDI -
  *      in this case, the sequence of XCHG reg, reg; operation ; XCHG is
  *      generated.
  *  - (all) 'PUSH fr' is emulated as
  *  'sub sp, num_of_slots_for(dbl64) ; mov [sp], fr'. 'POP fr' is emulated
  *  the same way.
  *  - (IA-32) Only 'mov/ld fp0, mem' and 'mov/st mem, fp0' are
  *  allowed. In this case, FST/FLD instructions are generated. \b NOTE:
  *  this simulation is only done in #fld and #fst methods, you can \b not
  *  do #mov with fp0. This limitation is intentional, to remove
  *  additional check and branch from the hot exectuion path in #mov.
  *
  * call() operation is made indirect only (trough a GR register). This is
  * done intentionally, to reduce differences in code generation between
  * platforms - on IA-32 we alway can do relative CALL, though on Intel 64
  * the possibility depends on whether the distance between CALL instruction
  * and its target fits into 2Gb. As the code is first generated into
  * internal buffer, and then copied to its final location, the distance
  * also changes and this may complicate the code generation routine.
  * In contrast, the <code>movp(gr, target); call(gr)</code> sequence works
  * the same way on all platforms.
  *
  * The code is generated into internal buffer represented by CodeStream
  * object.
  *
  * The Encoder also have support for \b patching of generated code.
  *
  * Patching is a process of changing some part of instruction after it has
  * been generated. Normally, this is used to finalize addresses that are
  * not yet known at the time of code generation (for example, for a forward
  * jump).
  *
  * The following instructions support patching: branches (br(COND) and
  * loading address into GR register (#movp).
  *
  * When the such instruction is generated, then a special \e patch \e record
  * is stored in the Encoder internally. The patch record contains some info
  * about the instruction - its length, offset, type (data or branch -
  * below), whether patching was done for this instruction and so on.
  *
  * Both methods accept additional user-defined arguments. In no way they
  * are interpreted by the Encoder itself, just associated with the
  * instruction to be patched. In CodeGen the arguments are used to store
  * basic block and instruction's PC.
  *
  * The method void patch(unsigned pid, void* inst_addr, void* data)
  * performs the patching.
  *
  * \c pid is 'patch id' returned by appropriate #br() or #movp() call. This
  * is also the offset of the instruction in the internal Encoder's buffer.
  *
  * \c inst_addr is the address of instruction to patch and \c data is the
  * data to be stored into instruction.
  *
  * In many cases, \c inst_addr points to the instruction in the internal
  * Encoder's buffer, so the short version of patch(unsigned, void*) method
  * exists.
  *
  * There are 2 kinds of patches - \e data and \e branch. The data patch
  * is used with instruction that operate with data addresses, e.g.
  * <code>mov gr, addr</code>. Branch patch applicable to br() instructions,
  * with the presumption that all branches are relative ones.
  *
  * The key difference is that when patching the \e data, address is stored
  * as-is, wihtout modification. When patching a branch, then the offset
  * between \c inst_addr and \c data (interpreted as address of target)
  * is calculated and the offset is stored into instruction.
  *
  * @todo FPU-only support, without SSE to work on P3-s. The basic idea
  *       is to emulate 'mov fr, fr'  using FXCH and 'mov fr, mem' and
  *       'mov mem, fr' using FLD, FST and FXCH.
  *
  * @todo IPF support. The basic idea is to hide one or two registers from
  * application and use them in Encoder internally to emulate complex address
  * form and other operations that are not natively support in IPF's
  * instruction set.
  */
 class Encoder {
 public:
     /**
      * No op.
      */
     Encoder() {
         m_trace = false;
     }

     /**
      * Tests whether tracing enabled for this Encoder instance.
      * @note Only valid when JIT_TRACE macro is defined. Otherwise always
      *       returns false.
      * @see JIT_TRACE
      * @see JET_PROTO
      */
     bool is_trace_on(void) const
     {
 #ifdef JIT_TRACE
         return m_trace;
 #else
         return false;
 #endif
     }
     /**
      * Tests whether the AR is callee-save.
      */
     static bool is_callee_save(AR ar)
     {
         return isCalleeSave[ar_idx(ar)];
     }

     /**
      * Generates MOV operation.
      */
     void mov(const Opnd& op0, const Opnd& op1)
     {
         if (is_trace_on()) {
             trace(string("mov")+"("+to_str(op0.jt())+")",
                 to_str(op0), to_str(op1));
         }
         mov_impl(op0, op1);
     }
     /**
      * Generates load of constant address into GR register.
      * @see movp(AR, unsigned, unsigned)
      */
     void movp(AR op0, const void *op1)
     {
         assert(op0 != ar_x);
         assert(is_gr(op0));
         if (is_trace_on()) {
             trace("movP", to_str(op0), to_str(op1));
         }
         movp_impl(op0, op1);
     }
     /**
      * Generates load of an address into GR register, for further patching.
      * @param gr - register to load
      * @param udata - user data (not interpreted by Encoder)
      * @param ubase - user data (not interpreted by Encoder)
      * @see movp(AR, const void*)
      */
     unsigned movp(AR gr, unsigned udata, unsigned ubase);
     /**
      * Generates load of an address specified by mem argument into the reg
      * argument.
      * @note \c reg must be register and \c mem can only be memory operand.
      */
     void lea(const Opnd& reg, const Opnd& mem);
     /**
      * Generates sign extension of I_8 from op1 into op0.
      */
     void sx1(const Opnd& op0, const Opnd& op1);
     /**
      * Generates sign extension of int16 from op1 into op0.
      */
     void sx2(const Opnd& op0, const Opnd& op1);
     /**
      * Generates sign extension op1 into op0.
      */
     void sx(const Opnd& op0, const Opnd& op1);
     /**
      * Generates zero extension of U_8 from op1 into op0.
      */
     void zx1(const Opnd& op0, const Opnd& op1);
     /**
      * Generates zero extension of uint16 from op1 into op0.
      */
     void zx2(const Opnd& op0, const Opnd& op1);
     /**
      * Generates ALU operation.
      */
     void alu(ALU alu, const Opnd& op0, const Opnd& op1)
     {
         if (is_trace_on()) {
             trace(to_str(alu), to_str(op0), to_str(op1));
         }
         alu_impl(alu, op0, op1);
     }

    /**
    * Generates n-byte long NOP instruction.
    */
     void nop(U_32 n) {
         if (is_trace_on()) {
             trace(string("nop"), to_str((int)n), string());
         }
         nop_impl(n);
     }

     /**
     * Performs bitwise NOT operation.
     */
     void bitwise_not(const Opnd& op0) {
         if (is_trace_on()) {
             trace(string("not"), to_str(op0), to_str(""));
         }
         not_impl(op0);
     }

     /**
     * Generates CMOVxx operation.
     */
     void cmovcc(COND cond, const Opnd& op0, const Opnd& op1)
     {
         if (is_trace_on()) {
             trace(string("cmov:")+ to_str(cond), to_str(op0), to_str(op1));
         }
         cmovcc_impl(cond, op0, op1);
     }

     /**
     * Generates CMPXCHG operation.
     */
     void cmpxchg(bool lockPrefix, AR addrBaseReg, AR newReg, AR oldReg)
     {
         if (is_trace_on()) {
             trace(string("cmpxchg:")+ (lockPrefix ? "(locked) ":"") + to_str(addrBaseReg), to_str(newReg), to_str(oldReg));
         }
         cmpxchg_impl(lockPrefix, addrBaseReg, newReg, oldReg);
     }

     /**
     * Generates write for 64-bit volatile value
     */
     void volatile64_set(Opnd& where, AR hi_part, AR lo_part)
     {
         if (is_trace_on()) {
             trace(string("volatile64_set:") + to_str(where), to_str(hi_part), to_str(lo_part));
         }
         volatile64_op_impl(where, hi_part, lo_part, true);
     }

     /**
     * Generates read for 64-bit volatile value
     */
     void volatile64_get(Opnd& where, AR hi_part, AR lo_part)
     {
         if (is_trace_on()) {
             trace(string("volatile64_get:") + to_str(where), to_str(hi_part), to_str(lo_part));
         }
         volatile64_op_impl(where, hi_part, lo_part, false);
     }

     /**
      * Generates ALU operation between two registers.
      *
      * The registers are used as \c jt type.
      */
     void alu(jtype jt, ALU op, AR op0, AR op1)
     {
         alu(op, Opnd(jt, op0), Opnd(jt, op1));
     }

     /**
      * Loads from memory into the specified register.
      *
      * Just a wrapper around mov().
      * @note On IA32 fp0 loads are threated in a special way.
      */
     void ld(jtype jt, AR ar, AR base, int disp=0, AR index = ar_x,
             unsigned scale=0)
     {
         if (is_f(jt)) {
             fld(jt, ar, base, disp, index, scale);
         }
         else {
             mov(Opnd(jt, ar), Opnd(jt, base, disp, index, scale));
         }
     }
     /**
      * Stores from the specified register into memory .
      * Just a wrapper around mov().
      */
     void st(jtype jt, AR ar, AR base, int disp=0, AR index = gr_x,
             unsigned scale=0)
     {
         if (is_f(jt)) {
             fst(jt, ar, base, disp, index, scale);
         }
         else {
             mov(Opnd(jt, base, disp, index, scale), Opnd(jt, ar));
         }
     }
     /**
      * Loads from memory into the specified FR register.
      *
      * Just a wrapper around mov().
      * @note On IA32 fp0 loads are threated in a special way.
      */
     void fld(jtype jt, AR ar, AR base, int disp=0, AR index = ar_x,
              unsigned scale=0);
     /**
      * Stores from the specified FR register into memory .
      *
      * Just a wrapper around mov().
      * @note On IA32 fp0 stores are threated in a special way.
      */
     void fst(jtype jt, AR ar, AR base, int disp=0, AR index = gr_x,
              unsigned scale=0);
     /**
      * Loads 8bit from memory into GR register.
      */
     void ld1(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         ld(i8, ar, base, disp, ridx, scale);
     }
     /**
      * Loads 16bit from memory into GR register.
      */
     void ld2(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         ld(i16, ar, base, disp, ridx, scale);
     }
     /**
      * Loads 32bit from memory into a register.
      */
     void ld4(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         ld(is_fr(ar) ? flt32 : i32, ar, base, disp, ridx, scale);
     }
     /**
      * Stores 8bit from GR register into memory.
      */
     void st1(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         st(i8, ar, base, disp, ridx, scale);
     }
     /**
      * Stores 16bit from GR register into memory.
      */
     void st2(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         st(i16, ar, base, disp, ridx, scale);
     }
     /**
      * Stores 32bit from GR or FR register into memory.
      */
     void st4(AR ar, AR base, int disp=0, AR ridx = ar_x, unsigned scale=0)
     {
         st(is_fr(ar) ? flt32 : i32, ar, base, disp, ridx, scale);
     }
     /**
      * Pushes the value onto stack.
      * @note Push of FR registers is emulated (sub sp, n ; mov [sp], fr).
      * @return Number of bytes spent from the stack - the number
      * subtracted from #sp .
      */
     int push(const Opnd& op0);
     /**
      * Pops out the value from stack.
      * @note Pop of FR registers is emulated (mov fr, [sp] ; add sp, n).
      * @return Number of bytes popped from the stack - the number added to
      *         #sp.
      */
     int pop(const Opnd& op0);

     /**
      * Returns number of bytes needed to store all the registers.
      */
     static unsigned get_all_regs_size(void)
     {
         return gr_num*STACK_SLOT_SIZE + fr_num*8;
     }

     /**
      * Pushes either all or all scratch registers onto stack.
      * Number of bytes spent on stack is always rounded to 16.
      */
     int push_all(bool includeCalleeSave=false);
     /**
      * Pops out either all or all scratch registers from stack.
      * Number of bytes popped from stack is always rounded to 16.
      */
     int pop_all(bool includeCalleeSave=false);

     /**
      * Generates return instruction.
      * @param pop_bytes - how many bytes to pop out from the stack after
      * return.
      */
     void ret(unsigned pop_bytes);

     /**
      * Generates indirect call instruction.
      *
      * If calling convention assumes that caller restores stack, then it
      * also generates code to restore stack.
      *
      * If check_stack is \c true and calling conventions obliges stack
      * alignment, then a code that checks this alignment is also generated.
      * trap() instruction is executed if alignment requirement not met.
      */
     void call(const Opnd& target, const CallSig& ci,
               bool check_stack = false);
     /**
      * Generates indirect call to \c target trough the specified register.
      * May place constant arguments according to \c cs. \c idx parameter
      * specifies which argument to start from. If all arguments are already
      * prepared, then set <code>idx = cs.count()</code>.
      *
      * If \c idx is 0 and any argument is passed via stack, then stack
      * preparation sequence is generated (<code>sub sp, cs.size()</code>).
      *
      * If calling convention assumes that caller restores stack, then
      * the proper instructions are generated.
      *
      * If check_stack is \c true and calling conventions obliges stack
      * alignment, then a code that checks this alignment is also generated.
      * trap() instruction is executed if alignment requirement not met.
      */
     void call(bool check_stack, AR gr, const void * target,
               const CallSig& cs, unsigned idx, ...);
     /**
      * Same as call(...) but takes arguments to pass from \c va_list.
      */
     void call_va(bool check_stack, AR ar, const void *target,
                  const CallSig& cs, unsigned idx, va_list& valist);
     /**
      *
      * @todo the name may be somehow confusing with CodeGen's one, may
      * think about renaming.
      */
     void gen_args(const CallSig& cs, AR grtmp, unsigned idx, unsigned count, ...);
     /**
      * Generates conditional or unconditional branch.
      * @param op - target operand
      * @param cond - condition for conditional branch or cond_none
      * @param hint - possible hint whether conditional branch is presumed
      * to be taken or not
      */
     void br(const Opnd& op, COND cond=cond_none, HINT hint=hint_none);
     /**
      * Generates conditional or unconditional branch for further patching.
      * @param cond - condition for conditional branch or cond_none
      * @param udata - user data (not interpreted by Encoder)
      * @param ubase - user data (not interpreted by Encoder)
      * @param hint - possible hint whenether conditional branch is presumed
      * to be taken or not
      * @return patching id (which is also ip offset of generated branch
      * instruction)
      */
     unsigned br(COND cond, unsigned udata, unsigned ubase,
                 HINT hint=hint_none);
     /**
      * Generates software breakpoint.
      */
     void trap(void);

     /**
      * Triggers software breakpoint.
      * @note The method does \b not generate software break point, but
      *       raises it in the current program instead - in platform
      *       dependent manner. On Win it's DebugBreak() and it's
      *       raise(SIGTRAP) on Linux.
      * @note To generate software break point use trap().
      * @see trap
      */
     static void debug(void);
     /**
      * @brief Returns current offset in the Encoder's internal buffer.
      * That is the offset where next instruction will be generated.
      */
     unsigned ipoff(void) const
     {
         return m_codeStream.ipoff();
     }

     /**
      * @brief Returns number of patch records registered in current Encoder.
      */
     unsigned patch_count(void) const
     {
         return (unsigned) m_patches.size();
     }
     /**
      * @brief Returns info about next patch record.
      * @param[out] ppid - patch id (which is also offset of instruction
      * in the Encoder's internal buffer)
      * @param[out] pudata - user data 1
      * @param[out] pubase - user data 1
      * @param[out] pdone - \b true if the instruction was patched already
      * @returns \b true if the patch record is for data instruction, \b
      * false for branch instruction.
      */
     bool enum_patch_data(unsigned* ppid, unsigned* pudata,
                          unsigned* pubase, bool* pdone)
     {
         *ppid = iter->first;
         const CodePatchItem& cpi = iter->second;
         *pudata = cpi.udata;
         *pubase = cpi.ubase;
         *pdone = cpi.done;
         return cpi.data;
     }
     /**
      * @brief Begins enumeration of patch records.
      */
     void * enum_start(void)
     {
         iter = m_patches.begin();
         return NULL; //(void*)&i;
     }
     /**
      * @brief Returns \b true if no more items to enumrate remains.
      */
     bool enum_is_end(void *h)
     {
         return iter == m_patches.end();
     }

     /**
      * @brief Advances enumeration iterator on next item.
      */
     void enum_next(void * h)
     {
         assert(iter != m_patches.end());
         ++iter;
     }
     /**
      * @brief Patch the given by \c pid instruction in the Encoder's
      * internal buffer.
      */
     void patch(unsigned pid, void * data)
     {
         // pid is also ipoff of the instruction
         void * inst_addr = ip(pid);
         patch(pid, inst_addr, data);
     }
     void patch(unsigned pid, void* inst_addr, void* data);
     /**
      * Returns a current 'ip' for underlying code stream - that is
      * an 'ip' where the next emitted instruction will begin.
      * The ip returned is a pointer to an internal temporary code buffer.
      */
     char * ip(void)
     {
         return m_codeStream.ip();
     }
     /**
      * @brief Returns address in Encoder's internal buffer by the given
      * offset.
      */
     char * ip(unsigned ipoff)
     {
         return m_codeStream.ip(ipoff);
     }

     /**
      * Sets a current ip for the internal code buffer.
      */
     void ip(char * _ip)
     {
         m_codeStream.ip(_ip);
     }
 protected:
     unsigned    m_trace;
 public:
     /**
      * Initializes internal Encoder's data.
      * Must be invoked before any usage of Encoder.
      */
     static void init(void);
 public:
     /**
      * Formats the given \c op into human-readable string.
      */
     static string to_str(const Opnd& op);
     /**
      * Formats the given \c ar into human-readable string.
      *
      * Callee-save registers are presented in capital letters.
      * @param ar - register to convert to string.
      * @param platf - if \c true, then a native (e.g. EAX) returned instead
      * of abstract one (e.g. gr0).
      */
     static string to_str(AR ar, bool platf = false);
     /**
      * Formats the given complex address from  into human-readable string.
      */
     static string to_str(AR base, int disp, AR index, unsigned scale);
     /**
      * Formats the given \c addr into human-readable string.
      */
     static string to_str(const void * addr);
     /**
      * Formats the given integer into human-readable string.
      */
     static string to_str(int i);
     /**
      * Formats the given ALU code into human-readable string.
      */
     static string to_str(ALU op);
     /**
      * Formats the given condition code into human-readable string.
      */
     static string to_str(COND cond);
     /**
      * Formats the given HINT into human-readable string.
      */
     static string to_str(HINT hint);
     /**
      * Formats the given jtype into human-readable string.
      */
     static string to_str(jtype jt);
 protected:
     void trace(const string& func, const string& op0, const string& op1);
     /**
      * Used to beautify debugging output for complex code sequences like
      * push_all().
      */
     string m_prefix;
     /**
      * An internal temporary buffer where the generated code is accumulated.
      * Normally not to be used directly, but instead through ip() methods calls.
      */
     CodeStream  m_codeStream;
 private:
     /**
      * Patch record.
      */
     struct CodePatchItem {
         /// length
         unsigned len;
         /// data or branch instruction
         bool data;
         /// \b true if instruction was patched
         bool done;
         /// user data 1
         unsigned udata;
         /// user data 2
         unsigned ubase;
     };
     /**
      * Map of patch records.
      */
     typedef map<unsigned, CodePatchItem> PATCH_MAP;
     /**
      * Storage of patch records.
      */
     PATCH_MAP   m_patches;
     /**
      * Iterator used during enumeration of patch records.
      */
     PATCH_MAP::iterator iter;
     /**
      * Set of flags which registers are callee-save.
      */
     static bitset<ar_num> isCalleeSave;
     /**
      * Creates patch record for current ipoff().
      */
     unsigned reg_patch(bool data, unsigned udata, unsigned ubase_ipoff);

     /**
      * Finalizes current patch record (stores instruction length, etc).
      */
     void reg_patch_end(unsigned pid);

     //
     // Platform-specific implementations
     //

     /// Implementation of mov().
     void mov_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of not().
     void not_impl(const Opnd& op0);
     /// Implementation of alu().
     void alu_impl(ALU op, const Opnd& op0, const Opnd& op1);
    //Implementation of nop()
     void nop_impl(U_32 n);
     /// Implementation of cmovcc().
     void cmovcc_impl(COND c, const Opnd& op0, const Opnd& op1);
     /// Implementation of cmpxchg().
     void cmpxchg_impl(bool lockPrefix, AR addrReg, AR newReg, AR oldReg);
     /// Implementation of volatile64 get and set ops().
     void volatile64_op_impl(Opnd& where, AR hi_part, AR lo_part, bool is_put);
     /// Implementation of lea().
     void lea_impl(const Opnd& reg, const Opnd& mem);
     /// Implementation of movp().
     void movp_impl(AR op0, const void *);
     /// Implementation of sx1().
     void sx1_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of sx2().
     void sx2_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of sx().
     void sx_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of zx1().
     void zx1_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of zx2().
     void zx2_impl(const Opnd& op0, const Opnd& op1);
     /// Implementation of fld().
     void fld_impl(jtype jt, AR op0, AR base, int disp, AR index,
                   unsigned scale);
     /// Implementation of fst().
     void fst_impl(jtype jt, AR op0, AR base, int disp, AR index,
                   unsigned scale);
     /// Implementation of push().
     int push_impl(const Opnd& op0);
     /// Implementation of pop().
     int pop_impl(const Opnd& op0);
     /// Implementation of call().
     void call_impl(const Opnd& target);
     /// Implementation of ret().
     void ret_impl(unsigned pop);
     /// Implementation of br().
     void br_impl(COND cond, HINT hint);
     /// Implementation of br().
     void br_impl(const Opnd& op, COND cond, HINT hint);
     /// Converts \c ar into platform's register name.
     static string to_str_impl(AR ar);
     /// Implementation of trap().
     void trap_impl(void);
     //
     static bool is_callee_save_impl(AR gr);
 };

 /**
  * Returns \b true if the \c ar is callee-save register.
  */
 inline bool is_callee_save(AR ar)
 {
     return Encoder::is_callee_save(ar);
 }


 }
 }; // ~namespace Jitrino::Jet

 #endif  // __ENC_H_INCLUDED__