blob: bbf8515adcb2bcb55b4bd3126aedf28a690181d2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_DOMAIN "emitter.ipf"
#include "cxxlog.h"
#include <memory.h>
#include "open/types.h"
#include "Code_Emitter.h"
/////////////////////////////////////////////////////////////////////////////
// Statistics gathering
/////////////////////////////////////////////////////////////////////////////
#ifdef ENABLE_ENCODER_STATS
static unsigned n_new_bundles=0;
static unsigned n_dbl_slots=0, n_have_dbl_slots=0;
static unsigned n_st_m=0, n_st_i=0, n_st_f=0, n_st_b=0;
// compute number of double slots
void Merced_Code_Emitter::compute_n_dbl_slots() {
bool have_dbl_slot=false;
int n_dbl_slots_here=0, n_st_m_here=0, n_st_i_here=0, n_st_f_here=0,
n_st_b_here=0;
for (Bundle_IR * bdl = wbuf_first; bdl!= wbuf_last; bdl = incr_wbuf_ptr(bdl)) {
int n_empty = 0;
int k_full=0;
bool branch=false;
for (int i=0;i<ENC_N_SLOTS; i++) {
if (bdl->slots[i].is_empty()) {
n_empty ++;
have_dbl_slot=true;
}
else
k_full=i;
if (bdl->slots[i].syl_type == ST_b)
branch=true;
}
assert (n_empty >=0 && n_empty < ENC_N_SLOTS);
if (branch) {
n_dbl_slots_here=0;
have_dbl_slot=false;
n_st_m_here=0;
n_st_i_here=0;
n_st_f_here=0;
n_st_b_here=0;
}
else if (n_empty ==2) {
n_dbl_slots_here++;
switch(bdl->slots[k_full].syl_type) {
case ST_m: n_st_m++; break;
case ST_i: n_st_i++; break;
case ST_f: n_st_f++; break;
case ST_b: n_st_b++; break;
}
}
}
if (have_dbl_slot)
n_have_dbl_slots++;
n_dbl_slots+=n_dbl_slots_here;
n_st_m+=n_st_m_here;
n_st_i+=n_st_i_here;
n_st_f+=n_st_f_here;
n_st_b+=n_st_b_here;
}
#endif
/////////////////////////////////////////////////////////////////////////////
// Mappings
/////////////////////////////////////////////////////////////////////////////
const U_8 Merced_Code_Emitter::cmp_ext_to_special_instr[cmp_last] = {
ENC_SI_none, ENC_SI_none, ENC_SI_cmp_and, ENC_SI_cmp_or, ENC_SI_none
};
// Unavailable templates mapping:
// Syl_Type * stop bit * instr slot # -> templates that became unavailabe
// Answers the question:
// What templates become unavailable as a result of assigning an instruction with
// a given Syl_type to a given instr slot #;
// stop bit indicates wheather an instruction requires a stop bit in front of it.
// Each template takes one bit. Available templates are marked by 1, unavailable - by 0.
#define NO_STOP 0
#define STOP 1
// call it eligible templates
const uint16 Merced_Code_Emitter::unavailable_tmplts[ST_last_type+1][2][ENC_N_SLOTS] = {
// ST_null
{ {0,0,0},{0,0,0}}, // unused in encoder - error
// ST_n
{ {0,0,0},{0,0,0}}, // unused in encoder - error
// ST_a
{ // no stop bit
{ 0x53f7, // bbb-9
0x1193, // mlx-2,mSmi-4,mfi-5,mbb-8,bbb-9,mfb-11
0x0071 // miSi-1,mlx-2,mmf-6,**b-7:11
},
// stop bit
{ 0x0000, // error
0x0020, // all but mSmi-4
0x0002 // all but miSi-1
}
},
// ST_m
{ // no stop bit
{ 0x53f7, // bbb-9
0x1090, // all but mmi-3,mmf-6,mmb-10
0x0000 // error
},
// stop bit
{ 0x0000, // error
0x0020, // all but mSmi-4
0x0000 // error
}
},
// ST_i
{ // no stop bit
{ 0x0000, // error
0x0103, // all but mii-0, miSi-1, mib-7
0x0071 // all but mii-0, mmi-3,mSmi-4,mfi-5
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0002 // all but miSi-1
}
},
// ST_f
{ // no stop bit
{ 0x0000, // error
0x4040, // all but mfi-5, mfb-11
0x0080 // all but mmf-6
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0000 // error
}
},
// ST_b
{ // no stop bit
{ 0x0800, // all but bbb-9
0x0a00, // all but mbb-8,bbb-9
0x5b00 // all but mib,mbb,bbb,mmb,mfb - 7:11
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0000 // error
}
},
// ST_il
{ // no stop bit
{ 0x0000, // error
0x0004, // all but mlx-2
0x0000 // error
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0000 // error
}
},
// ST_bl
{ // no stop bit
{ 0x0000, // error
0x0000, // error
0x5b00 // all but mib,mbb,bbb,mmb,mfb - 7:11
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0000 // error
}
},
// ST_bn
{{0,0,0},{0,0,0}}, // unused in encoder - error
// ST_ma
{ // no stop bit
{ 0x53f7, // bbb-9
0x0000, // error
0x0000 // error
},
// stop bit
{ 0x0000, // error
0x0000, // error
0x0000 // error
}
},
// ST_br
{{0,0,0},{0,0,0}}, // unused in encoder - error
// ST_fc
{{0,0,0},{0,0,0}}, // unused in encoder - error
// ST_is
{{0,0,0},{0,0,0}}, // unused in encoder - error
};
///////////////////////////////////////////////////////////////////
// Constructor
///////////////////////////////////////////////////////////////////
Merced_Code_Emitter::Merced_Code_Emitter (tl::MemoryPool & m, unsigned byteCodeSize,
unsigned nTargets) :
mem_pool(m),
wbuf_first(wbuf), wbuf_last(wbuf), wbuf_end(wbuf+ENC_WBUF_LEN),
slot_end(slots+ENC_GL_N_SLOTS), last_empty_slot(0),
gl_first_empty_slot(0),
n_targets(nTargets), next_instr_is_target(false),
needs_patching(false), curr_offset(0), patches(0),
ireg_map(reg_map),
freg_map(reg_map+ENC_N_GEN_REG),
breg_map(reg_map+ENC_N_GEN_REG+ENC_N_FLOAT_REG),
areg_map(reg_map+ENC_N_GEN_REG+ENC_N_FLOAT_REG+ENC_N_BRANCH_REG+ENC_N_PRED_REG),
preg_map(reg_map+ENC_N_GEN_REG+ENC_N_FLOAT_REG+ENC_N_BRANCH_REG),
n_fast_reg(0),
curr_wr_reg_vector(false), curr_rd_reg_vector(false),
fast_reg_dep_check(true),
curr_bc_addr((unsigned)-1), curr_is_mem_access(false),
wbuf_is_empty(true),
coupled_instr_state(ENC_single_instr),
known_mem_type(true), exch_instr(true),
encoder(&encoder0)
{
assert(ENC_WBUF_LEN >=4);
// first bit of each register map entry is set to 1 to indicate that the entry is empty
memset(reg_map,0x80,ENC_N_REG);
// FIXME cannot use passed memory pool
VERIFY_SUCCESS(apr_allocator_create(&allocator));
arena = apr_allocator_alloc(allocator, estimate_mem_size(byteCodeSize));
arena->next = NULL;
// set up pointers to the slots within bundles
Instr_IR * instr=slots;
for (Bundle_IR * p=wbuf; p!=wbuf_end; p++,instr+=ENC_N_SLOTS)
p->slots=instr;
wbuf_last->init(fast_reg_dep_check);
reset_mem_type();
// create a table of branch targets
target_offset=(uint64 *)mem_pool.alloc(sizeof(uint64) * n_targets);
for (unsigned i=0;i<n_targets; i++)
target_offset[i]=ENC_NOT_A_TARGET;
#ifdef _DEBUG
emit_after_get_code_size=false;
target_offset_is_set = new bool[n_targets];
// gashiman - added "i" declaration because it is required by modern C++
for (unsigned i=0;i<n_targets;i++)
target_offset_is_set[i]=false;
#endif
#ifdef ENC_SLOW_REG_DEP
start_slow_dep();
#endif
brl_patches = new MCE_brl_patch_list();
}
Merced_Code_Emitter::~Merced_Code_Emitter() {
apr_allocator_destroy(allocator);
}
///////////////////////////////////////////////////////////////////
// Memory allocation
///////////////////////////////////////////////////////////////////
void * Merced_Code_Emitter::_alloc_space(size_t size) {
void * mem;
apr_memnode_t * new_arena;
if (size < (size_t)(arena->endp - arena->first_avail)) {
// we have enough space
mem = arena->first_avail;
arena->first_avail += size;
return mem;
}
new_arena = apr_allocator_alloc(allocator, size);
// insert new arena into chain
new_arena->next = arena;
arena = new_arena;
// allocate memory from new arena
mem = arena->first_avail;
arena->first_avail += size;
return mem;
}
// Estimate the memory size.
// IA32 heuristics:
// The estimated size needed for emitting code is 2/3 N * 12
// which is 8 * N.
// The reasoning is that there are approximately one 12 byte ia32
// instruction per 2/3 bytes of byte code. Also, we add 12 bytes
// for the arena header size.
// IPF heuristics:
// ???
void Merced_Code_Emitter::_free_arena(apr_memnode_t * node) {
if (node->next) {
apr_allocator_free(allocator, node->next);
node->next = NULL;
}
node->first_avail = (char *)node + APR_MEMNODE_T_SIZE;
}
unsigned Merced_Code_Emitter::estimate_mem_size(unsigned byteCodeSize) {
return (byteCodeSize * 8);
}
// Calculate the code size
unsigned Merced_Code_Emitter::get_size() {
#ifdef _DEBUG
emit_after_get_code_size=false;
#endif
if (!wbuf_is_empty)
emit_all();
unsigned size = 0;
for (apr_memnode_t *a = arena; a != NULL; a = a->next) {
char * mem_begin = (char *)a + APR_MEMNODE_T_SIZE;
size += (unsigned) (a->first_avail - mem_begin);
}
return size;
}
// Copy one arena to a buffer
char * Merced_Code_Emitter::_copy(char *buffer, apr_memnode_t *a) {
if (a == 0)
return buffer;
buffer = _copy(buffer,a->next);
char * mem_begin = (char *)a + APR_MEMNODE_T_SIZE;
unsigned size = (unsigned) (a->first_avail - mem_begin);
memcpy(buffer,mem_begin,size);
return buffer + size;
}
void Merced_Code_Emitter::copy(char *buffer) {
assert(!emit_after_get_code_size);
assert (coupled_instr_state == ENC_single_instr);
if (!wbuf_is_empty)
emit_all();
// copy arenas starting from first one
// arenas are organized in a LIFO order
_copy(buffer,arena);
// do code patching
apply_patches(buffer,target_offset);
apply_brl_patches(buffer);
_free_arena(arena);
}
uint64 Merced_Code_Emitter::code_check_sum() {
uint64 s=0;
for (apr_memnode_t * a = arena; a != 0; a = a->next) {
char * mem_begin = (char *)a + APR_MEMNODE_T_SIZE;
for (char * bp = mem_begin; bp != a->first_avail; bp++)
s += *bp;
}
return s;
}
///////////////////////////////////////////////////////////////////
// Naive code emission - one instruction per bundle
///////////////////////////////////////////////////////////////////
// Encode instruction slot by a given instruction
// Assumes that the instruction slot contains zeros
static void encode_slot(U_8* bundle, uint64 instr, int slot)
{
// Instr is in bits 0:40, little endian.
uint64 * u;
switch(slot) {
case 0:
// Slot 0 takes bits 5:45
u=(uint64 *)bundle;
*u |= instr << 5;
break;
case 1:
// Slot 1 takes bits 46:86, i.e. 46:63 and 0:22
u=(uint64 *)bundle;
*u |= instr << 46;
u++;
*u |= instr >> 18;
break;
case 2:
// Slot 2 takes bits 87:127, i.e. 23:63
u=(uint64 *)(bundle + 8);
*u |= instr << 23;
break;
default: DIE(("Unexpected slot"));
}
}
static inline void encode_template(U_8* bundle, EM_Templates tmplt, bool stop_bit)
{
*bundle |= (tmplt << 1);
if (stop_bit)
*bundle |= 0x1;
}
static inline void reset_bundle(U_8* bundle) {
memset(bundle,0,IPF_INSTRUCTION_LEN);
}
// Encode nop with 0 immediate.
// nop instruction should be contained in bits 63:23
static uint64 encode_nop_wo_imm(EM_Syllable_Type syl_type) {
uint64 nop_code=0;
switch (syl_type) {
case ST_m:
case ST_i:
case ST_f:
nop_code = 0x0008000000;
break;
case ST_b:
nop_code = 0x4000000000;
break;
default:
DIE(("Unexpected syllable type"));
}
return nop_code;
}
//////////////////////////////////////////////////////////////////////
// Miscelaneous definitions
//////////////////////////////////////////////////////////////////////
static uint64
nop_i = encode_nop_wo_imm(ST_i),
nop_m = encode_nop_wo_imm(ST_m),
nop_f = encode_nop_wo_imm(ST_f),
nop_b = encode_nop_wo_imm(ST_b);
static inline uint64 typed_nop(EM_Syllable_Type syl_type)
{
switch (syl_type) {
case ST_m: return nop_m;
case ST_i: return nop_i;
case ST_b: return nop_b;
case ST_f: return nop_f;
default: DIE(("Unexpected syllable type"));
}
return 0;
}
/////////////////////////////////////////////////////////////////////////////
// Dependency checks
/////////////////////////////////////////////////////////////////////////////
// switch to a slow (but universal) mode of the register dependency check
#ifdef ENC_SLOW_REG_DEP
void Merced_Code_Emitter::start_slow_dep() {
fast_reg_dep_check=false;
// place pointers to the bit vectors in the instructions
for (Bundle_IR * p=wbuf; p!=wbuf_end; p++) {
for (int sl_n=0; sl_n<ENC_N_SLOTS; sl_n++) {
Instr_IR * const instr = &(p->slots[sl_n]);
instr->written_regs.slow=new(mem_pool) Enc_All_Reg_BV(false);
instr->read_regs.slow=new(mem_pool) Enc_All_Reg_BV(false);
}
}
}
#else
void Merced_Code_Emitter::switch_to_slow_reg_dep_check() {
fast_reg_dep_check=false;
// create an array of bit vectors
Enc_All_Reg_BV * wr_reg_vec[ENC_N_WBUF_INSTR], * rd_reg_vec[ENC_N_WBUF_INSTR];
int i;
for (i=0; i<ENC_N_WBUF_INSTR; i++) {
wr_reg_vec[i]=new(mem_pool) Enc_All_Reg_BV(false);
rd_reg_vec[i]=new(mem_pool) Enc_All_Reg_BV(false);
}
// copy the register info from fast scheme storage to full bit vectors
for (int r=0;r<ENC_N_REG; r++) { // for all registers
U_8 pos = reg_map[r];
if (!(pos & 0x80)) { // if register is fast one
uint64 z=((uint64)0x1)<<pos;
i=0;
for (Bundle_IR * p=wbuf; p!=wbuf_end; p++) {
for (int sl_n=0;sl_n<ENC_N_SLOTS; sl_n++, i++) {
Instr_IR * const instr=&(p->slots[sl_n]);
if (instr->is_filled()) {
if (instr->written_regs.fast & z)
wr_reg_vec[i]->set(r);
if (instr->read_regs.fast & z)
rd_reg_vec[i]->set(r);
}
}
}
}
}
// place pointers to the bit vectors in the instructions
i=0;
for (Bundle_IR * p=wbuf; p!=wbuf_end; p++) {
for (int sl_n=0; sl_n<ENC_N_SLOTS; sl_n++, i++) {
Instr_IR * const instr = &(p->slots[sl_n]);
instr->written_regs.slow=wr_reg_vec[i];
instr->read_regs.slow=rd_reg_vec[i];
}
}
}
#endif
// switch one instruction IR to slow register dependency check mode
void Merced_Code_Emitter::switch_ir_to_slow_reg_dep_check(Instr_IR &ir,
Enc_All_Reg_BV* read_regs, Enc_All_Reg_BV* written_regs) {
// copy the register info from fast scheme storage to full bit vectors
for (int r=0;r<ENC_N_REG; r++) { // for all registers
U_8 pos = reg_map[r];
if (!(pos & 0x80)) { // if register is fast one
uint64 z=((uint64)0x1)<<pos;
if (ir.written_regs.fast & z)
written_regs->set(r);
if (ir.read_regs.fast & z)
read_regs->set(r);
}
}
ir.read_regs.slow=read_regs;
ir.written_regs.slow=written_regs;
}
// check if there is register dependency between two instructions
inline static bool exists_reg_dep_fast(Instr_IR * instr1, Instr_IR * instr2) {
assert(instr1->is_instr_head() && instr2->is_instr_head());
return ((instr1->written_regs.fast & instr2->written_regs.fast) ||
(instr1->written_regs.fast & instr2->read_regs.fast) ||
(instr1->read_regs.fast & instr2->written_regs.fast));
}
inline static bool exists_reg_dep_slow(Instr_IR * instr1, Instr_IR * instr2) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return (instr1->written_regs.slow->does_intersect(instr2->written_regs.slow) ||
instr1->written_regs.slow->does_intersect(instr2->read_regs.slow) ||
instr1->read_regs.slow->does_intersect(instr2->written_regs.slow));
}
inline static bool exists_reg_dep(Instr_IR * instr1, Instr_IR * instr2, int fast_mode) {
#ifdef ENC_SLOW_REG_DEP
return exists_reg_dep_slow(instr1,instr2);
#else
return (fast_mode ? exists_reg_dep_fast(instr1,instr2) :
exists_reg_dep_slow(instr1,instr2));
#endif
}
// check if there is w-r register dependency between instructions
inline static uint64 exists_wr_reg_dep_fast(Instr_IR * instr1, Instr_IR * instr2) {
return (instr1->written_regs.fast & instr2->read_regs.fast);
}
inline static uint64 exists_wr_reg_dep_slow(Instr_IR * instr1, Instr_IR * instr2) {
return instr1->written_regs.slow->does_intersect(instr2->read_regs.slow);
}
inline static uint64 exists_wr_reg_dep(Instr_IR * instr1, Instr_IR * instr2,int fast_mode) {
#ifdef ENC_SLOW_REG_DEP
return exists_wr_reg_dep_slow(instr1,instr2);
#else
return (fast_mode ? exists_wr_reg_dep_fast(instr1,instr2) :
exists_wr_reg_dep_slow(instr1,instr2));
#endif
}
// check if thre is w-w register dependency between instructions
inline static uint64 exists_ww_reg_dep_fast(Instr_IR * instr1, Instr_IR * instr2) {
return (instr1->written_regs.fast & instr2->written_regs.fast);
}
inline static uint64 exists_ww_reg_dep_slow(Instr_IR * instr1, Instr_IR * instr2) {
return instr1->written_regs.slow->does_intersect(instr2->written_regs.slow);
}
inline static uint64 exists_ww_reg_dep(Instr_IR * instr1, Instr_IR * instr2, int fast_mode) {
#ifdef ENC_SLOW_REG_DEP
return exists_ww_reg_dep_slow(instr1,instr2);
#else
return (fast_mode ? exists_ww_reg_dep_fast(instr1,instr2) :
exists_ww_reg_dep_slow(instr1,instr2));
#endif
}
// check if two instructions should belong to different instruction groups
inline static bool need_diff_instr_groups(Instr_IR * instr1, Instr_IR * instr2, int fast_mode) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
// Special cases for RAW & WAW
// 1. no branch reg RAW dependency from mt-BR to BR
// 2. no pred.reg RAW dependency from icmp to BR
// 3. no branch reg WAW from br.call to mt-BR
// 4. no pred. reg WAW between compare instructions that are
// either all OR-type or all AND-type
bool dep = exists_wr_reg_dep(instr1,instr2,fast_mode) &&
!(((instr1->special_instr & ENC_SI_mtbr) ||
(instr1->special_instr & ENC_SI_icmp))
&& instr2->syl_type == ST_b);
if (!dep)
dep = exists_ww_reg_dep(instr1,instr2,fast_mode) &&
!(((instr1->special_instr & ENC_SI_brcall) &&
(instr2->special_instr & ENC_SI_mtbr))
|| (instr1->special_instr & instr2->special_instr &
(ENC_SI_cmp_and | ENC_SI_cmp_or))
);
return dep;
}
// check if one of the instructions accesses IP register
inline static bool exists_ip_dep(Instr_IR * instr1, Instr_IR * instr2) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return (instr1->special_instr & ENC_SI_mtip ||
instr2->special_instr & ENC_SI_mtip);
}
// check if one of the instructions is branch
inline static bool exists_br_dep(Instr_IR * instr1, Instr_IR * instr2) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return (instr1->syl_type == ST_b || instr2->syl_type == ST_b);
}
// check if there is a memory dependency
inline static bool exists_mem_dep(Instr_IR * instr1, Instr_IR * instr2) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return (instr1->is_mem_access() && instr2->is_mem_access() &&
instr1->mem_value == instr2->mem_value &&
instr1->mem_type == instr2->mem_type);
}
// check if there is an exception dependency
inline static bool exists_exception_dep(Instr_IR * instr1, Instr_IR * instr2) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return (instr1->may_throw_exc() || instr2->may_throw_exc());
}
inline static bool may_exchange_instr(Instr_IR * instr1, Instr_IR * instr2, int fast_mode) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return ( !exists_ip_dep(instr1,instr2) &&
!exists_exception_dep(instr1,instr2) &&
!exists_br_dep(instr1,instr2) &&
!exists_reg_dep(instr1,instr2,fast_mode) &&
!exists_mem_dep(instr1,instr2) &&
!instr1->is_target() && !instr2->is_target()
);
}
inline static bool is_unordered_couple(Instr_IR * instr1, Instr_IR * instr2, int fast_mode) {
assert (instr1->is_instr_head() && instr2->is_instr_head());
return ( !exists_ip_dep(instr1,instr2) &&
!exists_exception_dep(instr1,instr2) &&
!exists_br_dep(instr1,instr2) &&
!exists_reg_dep(instr1,instr2,fast_mode) &&
!exists_mem_dep(instr1,instr2)
);
}
/////////////////////////////////////////////////////////////////////////
// Scheduling a single instruction
/////////////////////////////////////////////////////////////////////////
// Check if instruction fits in a slot
// Return true if success, false if instr does not fit.
// Also find out if we need a stop bit before this slot
bool Merced_Code_Emitter::instr_fits_into_slot(Bundle_IR * bundle, int slot,
Unsch_Instr_IR& instr, unsigned& need_stop)
{
Instr_IR * UNUSED slot_instr = &(bundle->slots[slot]);
assert (slot >=0 && slot < ENC_N_SLOTS);
assert (slot_instr->is_empty());
// Can we place an instruction of a given type into this slot?
unsigned u=unavailable_tmplts[instr.syl_type][NO_STOP][slot];
if ((bundle->avail_tmplts & u) == 0) {
return false;
}
// Do we need a stop before this slot?
need_stop=0;
for (int sl_n=0; sl_n<slot; sl_n++) {
if (!bundle->slots[sl_n].is_empty() &&
need_diff_instr_groups(&(bundle->slots[sl_n]),&instr, fast_reg_dep_check)) {
need_stop = 1;
break;
}
}
if ((slot > 0) && (instr.special_instr & ENC_SI_start_igroup) != 0)
need_stop = 1;
// Can we place a stop bit?
if (need_stop &&
((bundle->avail_tmplts & unavailable_tmplts[instr.syl_type][STOP][slot]) == 0)) {
return false;
}
return true;
}
// Place instruction into a slot
void Merced_Code_Emitter::place_instr_into_slot(Bundle_IR * bundle, int slot,
Unsch_Instr_IR& instr, unsigned need_stop)
{
Instr_IR * slot_instr = &(bundle->slots[slot]);
assert (slot >=0 && slot < ENC_N_SLOTS);
assert (slot_instr->is_empty());
assert (instr.syl_type != ST_br);
copy_ir_into_slot(*slot_instr,instr);
// mark unavailable templates
uint16 u=unavailable_tmplts[instr.syl_type][need_stop][slot];
assert ((bundle->avail_tmplts & u) != 0);
bundle->avail_tmplts &= u;
// copy target information
if (instr.is_target()) {
assert (!(bundle->flags & ENC_BDL_is_target)); // bundle is not a target yet
bundle->flags |= ENC_BDL_is_target;
bundle->target_id=instr.target_id;
}
// handle ST_il
if (instr.syl_type == ST_il) {
assert (slot == 1);
bundle->slots[2].code_image1=instr.code_image2;
bundle->slots[2].filled_tail();
}
// adjust last empty slot information
if (bundle == wbuf_last) {
if (instr.syl_type == ST_il)
slot++;
if (slot >= last_empty_slot) {
last_empty_slot=slot+1;
if (instr.special_instr & ENC_SI_brcall) // no instr after call
last_empty_slot = ENC_N_SLOTS;
}
assert(last_empty_slot <= ENC_N_SLOTS);
}
}
// Try to place an instruction in a current bundle
// Return true if success, false if instruction does not
// fit.
bool Merced_Code_Emitter::place_instr_into_bundle(Bundle_IR * bundle, Unsch_Instr_IR& instr) {
int i;
for (i=last_empty_slot; i<ENC_N_SLOTS; i++) {
unsigned need_stop;
if (instr_fits_into_slot(bundle,i,instr,need_stop)) {
place_instr_into_slot(bundle,i,instr,need_stop);
return true;
}
}
return false;
}
// Schedule an instruction without reordering
void Merced_Code_Emitter::schedule_an_IR_ne (Unsch_Instr_IR& ir) {
bool fitted=false;
if (last_empty_slot < ENC_N_SLOTS)
fitted = place_instr_into_bundle(wbuf_last,ir);
if (!fitted) {
new_bundle();
bool UNUSED result=place_instr_into_bundle(wbuf_last,ir);
assert(result);
}
}
// Schedule an instruction with reordering
void Merced_Code_Emitter::schedule_an_IR_ex (Unsch_Instr_IR& ir) {
// Create a list of empty slots in which the instruction might be put.
// Start the list from *all* empty slots in the last bundle
int n_empty_slots=0;
Instr_IR * sl;
int stop_search=false;
#ifdef _DEBUG
bool started_new_bundle=false;
#endif
if (last_empty_slot == ENC_N_SLOTS) {
new_bundle();
#ifdef _DEBUG
started_new_bundle=true;
#endif
}
Instr_IR * gl_last_empty_slot = wbuf_last->slots + ENC_N_SLOTS - 1;
if (!gl_first_empty_slot)
gl_first_empty_slot=wbuf_last->slots + last_empty_slot;
for (sl=gl_last_empty_slot; !stop_search; sl=decr_slot_ptr(sl)) {
if (sl == gl_first_empty_slot)
stop_search=true;
if (sl->can_be_filled()) {
// add to the list of empty slots
empty_slots[n_empty_slots++]=sl;
}
else if (sl->is_instr_head()){
// check if scheduling ir before this instruction is legal
if (!may_exchange_instr(&ir,sl,fast_reg_dep_check))
break;
else {
}
}
} // for
// Try to fit instruction as early as possible
int i;
for (i=n_empty_slots-1; i>=0; i--) {
sl=empty_slots[i];
int gl_sl_number= (int) (sl - slots);
Bundle_IR * wbuf_curr = wbuf + (gl_sl_number/ENC_N_SLOTS);
int sl_n = gl_sl_number % ENC_N_SLOTS;
unsigned need_stop;
if (instr_fits_into_slot(wbuf_curr, sl_n,ir,need_stop)) {
place_instr_into_slot(wbuf_curr, sl_n, ir, need_stop);
// adjust first empty slot pointer
if (sl == gl_first_empty_slot)
gl_first_empty_slot = (i > 0 ? empty_slots[i-1] : 0);
break;
}
}
// Instruction has been fitted if (i>=0)
if (i<0) {
#ifdef _DEBUG
assert(!started_new_bundle);
#endif
new_bundle();
bool UNUSED result=place_instr_into_bundle(wbuf_last,ir);
assert(result);
}
}
////////////////////////////////////////////////////////////////////////
// Scheduling two instructions in the same bundle
////////////////////////////////////////////////////////////////////////
// Check if two instructions fit in two given slots in the same bundle
// Return true if success, false if instr does not fit.
bool Merced_Code_Emitter::instr_couple_fits_into_slots(Bundle_IR * bundle,
unsigned slot1, unsigned slot2, Unsch_Instr_IR& instr1, Unsch_Instr_IR & instr2,
unsigned& stop_pos)
{
Instr_IR * UNUSED slot_instr1 = &(bundle->slots[slot1]);
Instr_IR * UNUSED slot_instr2 = &(bundle->slots[slot2]);
assert (slot1 >=0 && slot1 < ENC_N_SLOTS && slot2 >=0 && slot2 < ENC_N_SLOTS);
assert (slot_instr1->is_empty() && slot_instr2->is_empty());
// Can we place instructions based on their type?
uint16 avail_tmplts = bundle->avail_tmplts;
uint16 u = unavailable_tmplts[instr1.syl_type][NO_STOP][slot1];
u &= unavailable_tmplts[instr2.syl_type][NO_STOP][slot2];
if ((avail_tmplts & u) == 0) {
return false;
}
// Map instructions to slots
int sl_n;
Instr_IR * new_slots[ENC_N_SLOTS];
for (sl_n=0; sl_n<ENC_N_SLOTS; sl_n++) {
if (bundle->slots[sl_n].is_filled())
new_slots[sl_n]=&(bundle->slots[sl_n]);
else
new_slots[sl_n]=0;
}
new_slots[slot1]=&instr1;
new_slots[slot2]=&instr2;
// Do we need a stop in this bundle?
stop_pos=0; // # of slot after stop, 0 for no stop
for (sl_n=0; sl_n<ENC_N_SLOTS; sl_n++) if (new_slots[sl_n]) {
int sl_n1;
for (sl_n1=sl_n+1; sl_n1<ENC_N_SLOTS; sl_n1++) if (new_slots[sl_n1]) {
if (need_diff_instr_groups(new_slots[sl_n],new_slots[sl_n1], fast_reg_dep_check)) {
if (stop_pos != 0) {
return false;
}
stop_pos = sl_n1;
break;
}
}
}
// Does there exist a template that accomodates both instructions with a given
// stop position?
unsigned need_stop1 = ((slot1 && stop_pos == slot1) ? 1 : 0);
unsigned need_stop2 = ((slot2 && stop_pos == slot2) ? 1 : 0);
u=unavailable_tmplts[instr1.syl_type][need_stop1][slot1];
u &= unavailable_tmplts[instr2.syl_type][need_stop2][slot2];
if ((avail_tmplts & u) == 0) {
#ifdef _DEBUG_ENCODEER
printf(" - rejected based on type/slot combination\n");
#endif
return false;
}
return true;
}
// Try to place two instructions in the bundle.
// Returns pointer to the first used slot if fitteed, 0 otherwise
//
Instr_IR * Merced_Code_Emitter::place_instr_couple_into_bundle(Bundle_IR * bundle,
Unsch_Instr_IR& instr1, Unsch_Instr_IR& instr2, bool unord) {
int i,j;
for (i=0; i<ENC_N_SLOTS; i++) if (bundle->slots[i].can_be_filled()) {
for (j=i+1; j<ENC_N_SLOTS; j++) if (bundle->slots[j].can_be_filled()) {
unsigned stop_pos;
if (instr_couple_fits_into_slots(bundle,i,j,instr1,instr2, stop_pos))
place_instr_couple_into_slots(bundle,i,j,instr1,instr2,stop_pos);
else if (unord && instr_couple_fits_into_slots(bundle, i,j, instr2, instr2, stop_pos))
place_instr_couple_into_slots(bundle,i,j,instr2,instr1, stop_pos);
return &(bundle->slots[i]);
}
}
return 0;
}
// Schedule two instructions in a bundle without reordering
void Merced_Code_Emitter::schedule_two_IR_ne(Unsch_Instr_IR& ir1, Unsch_Instr_IR& ir2, bool unord) {
Instr_IR * fitted=0;
if (last_empty_slot < ENC_N_SLOTS-1) // need two spaces
fitted = place_instr_couple_into_bundle(wbuf_last,ir1,ir2,unord);
if (!fitted) {
new_bundle();
fitted=place_instr_couple_into_bundle(wbuf_last,ir1,ir2,unord);
assert(fitted);
}
}
// Schedule two instructions in a bundle with reordering
void Merced_Code_Emitter::schedule_two_IR_ex(Unsch_Instr_IR & ir1, Unsch_Instr_IR& ir2, bool unord) {
// Create a list of bundles in which the pair of instruction might be put
// Algorithm:
// Scan instruction list and check whether the instructions to be scheduled
// can be moved past the instructions that are already in the buffer.
// Meanwhile, collect the list of the bundles that have two empty slots.
// The scan is over when we checked all the instructions upto the first empty one,
// or encountered an instruction that we cannot jump over.
int n_empty_bdls=0, n_empty_slots=0;
int stop_search=false;
#ifdef _DEBUG
bool started_new_bundle=false;
#endif
if (last_empty_slot == ENC_N_SLOTS) {
new_bundle();
#ifdef _DEBUG
started_new_bundle=true;
#endif
}
// Loop initialization
Instr_IR * gl_last_empty_slot = wbuf_last->slots + ENC_N_SLOTS - 1;
if (!gl_first_empty_slot)
gl_first_empty_slot=wbuf_last->slots + last_empty_slot;
Instr_IR * sl;
int curr_slot=ENC_N_SLOTS - 1;
Bundle_IR * curr_bundle = wbuf_last;
unsigned curr_empty_slots=0;
unsigned move_state=0;
for (sl=gl_last_empty_slot; !stop_search; sl=decr_slot_ptr(sl)) {
if (sl == gl_first_empty_slot)
stop_search=true;
if (sl->can_be_filled()) {
curr_empty_slots++;
empty_slots[n_empty_slots++]=sl;
}
else if (sl->is_instr_head()) {
// check if scheduling instr before this instruction is legal
unsigned may1=may_exchange_instr(&ir1,sl,fast_reg_dep_check);
unsigned may2=may_exchange_instr(&ir2,sl,fast_reg_dep_check);
if (!may1 || !may2) {
stop_search=true;
if (may1 && !may2)
move_state=1;
else if (!may1 && may2)
move_state=2;
}
} // else
// If we just passed a bundle with 2 empty slots - add it to the list
// Note, that it does not matter why we stopped if there are 2 empty slots,
// the only filled one can be slot 0 and we'll try to schedule in slots 1 and 2.
if ((curr_slot == 0 || stop_search) && curr_empty_slots >=2) {
empty_bdls[n_empty_bdls++]=curr_bundle;
move_state=0;
}
// If we stopped because we could not move one of the instructions and
// there is one empty slot after it - add bundle to the list
if (stop_search && curr_empty_slots == 1 &&
(move_state==1 || (move_state==2 && unord)))
empty_bdls[n_empty_bdls++]=curr_bundle;
// update variables for the next iteration
if (!stop_search) {
if (curr_slot == 0) {
curr_slot = ENC_N_SLOTS-1;
curr_bundle = decr_wbuf_ptr(curr_bundle);
curr_empty_slots=0;
}
else
curr_slot--;
}
} // for
// Try to fit instructions as early as possible
int ib=n_empty_bdls-1;
Instr_IR * first_filled_slot=0;
// Process last bundle specially if we couldn't move one of the instructions
if (move_state == 1)
first_filled_slot = place_instr_couple_into_bundle(empty_bdls[ib--],ir1,ir2,false);
else if (move_state == 2) {
assert(unord);
first_filled_slot = place_instr_couple_into_bundle(empty_bdls[ib--],ir2,ir1,false);
}
while (!first_filled_slot && ib>=0)
first_filled_slot = place_instr_couple_into_bundle(empty_bdls[ib--],ir1,ir2,unord);
if (!first_filled_slot) { // instrs did not fit so far
#ifdef _DEBUG
assert(!started_new_bundle);
#endif
new_bundle();
first_filled_slot=place_instr_couple_into_bundle(wbuf_last,ir1,ir2,unord);
assert(first_filled_slot);
}
// Adjust gl_first_empty_slot pointer
if (first_filled_slot == gl_first_empty_slot)
gl_first_empty_slot= (n_empty_slots >= 3 ? empty_slots[n_empty_slots-3] : 0);
}
// Schedule an instruction
void Merced_Code_Emitter::schedule_an_IR (Unsch_Instr_IR& ir) {
static Unsch_Instr_IR prev_ir;
static Enc_All_Reg_BV read_regs, written_regs;
static bool prev_dep_check_fast;
#ifdef _DEBUG
emit_after_get_code_size=true;
#endif
switch (coupled_instr_state) {
case ENC_single_instr: // normal instruction
wbuf_is_empty=false;
if (exch_instr)
schedule_an_IR_ex(ir);
else
schedule_an_IR_ne(ir);
break;
case ENC_first_coupled_instr: // 1st instr in a pair
if (!fast_reg_dep_check) {
prev_ir.read_regs.slow=&read_regs;
prev_ir.written_regs.slow=&written_regs;
}
copy_unscheduled_ir(prev_ir, ir);
coupled_instr_state = ENC_second_coupled_instr;
prev_dep_check_fast=fast_reg_dep_check;
break;
case ENC_second_coupled_instr: // 2nd instr in a pair
wbuf_is_empty=false;
assert (!curr_instr_couple_is_unordered ||
is_unordered_couple(&prev_ir, &ir, fast_reg_dep_check));
if (prev_dep_check_fast != fast_reg_dep_check) {
assert(!fast_reg_dep_check);
switch_ir_to_slow_reg_dep_check(prev_ir, &read_regs, &written_regs);
}
if (exch_instr)
schedule_two_IR_ex(prev_ir, ir, curr_instr_couple_is_unordered);
else
schedule_two_IR_ne(prev_ir, ir, curr_instr_couple_is_unordered);
coupled_instr_state = ENC_single_instr;
break;
default: DIE(("Unknown instruction state"));
}
}
///////////////////////////////////////////////////////////////////////////
// code patching
///////////////////////////////////////////////////////////////////////////
void Branch_Patch::apply(char * code_buf, uint64* target_offset_tbl) {
unsigned imm21=(unsigned)((target_offset_tbl[patch_target_id] - offset) >> 4);
// 21.5.1.1 (B1) - imm_20 at 13:32, s at 36
uint64 imm21_fmt= (0xFFFFF & imm21) | ((0x100000 & imm21)<<3);
uint64 * u=0;
char * b =0;
switch (slot) {
case 0: // 0:23 -> 18:41
u=(uint64 *)(code_buf+offset);
*u |= imm21_fmt << 18;
break;
case 1: // 0:23 -> 59:82
b=code_buf+offset+7;
*b |= (char)((imm21_fmt & 0x1f)<<3);
u=(uint64 *)(code_buf+offset+8);
*u |= imm21_fmt >> 5;
break;
case 2: // 0:23 -> 100:123
u=(uint64*)(code_buf+offset+8);
*u |= imm21_fmt <<36;
break;
default: DIE(("Unexpected slot"));
}
}
void Movl_Patch::apply(char * code_buf, uint64* target_offset_tbl) {
assert (slot == 1); // movl is always in 1st slot
uint64 tgt=(uint64)(code_buf+target_offset_tbl[patch_target_id]);
// Bundle layout computed from 21.8 - X2 and 21.7.2
// Word 1:
// Bits Content
// 0-4 template
// 5-45 slot0
// 46-63 18 least significant bits of imm_41
// Word 2:
// 0-22 23 most significant bits of imm_41
// 23-28 qp
// 29-35 r_1
// 36-42 imm_7b
// 43 v_c
// 44 i_c
// 45-49 imm_5c
// 50-58 imm_9d
// 59 i
// 60-63 6
// 64-bit word 1
uint64* w64 = (uint64*)(code_buf+offset);
// 18 least sign. bits of imm_41 at 46
*w64 |= ((tgt & 0x000000ffffc00000) << 24); // << 46, >> 22
// 64-bit word 2
w64++;
// 23 most significant bits of imm_41 starting at 0
*w64 |= ((tgt & 0x7fffff0000000000) >> 40); // >>18, >> 22
// imm_7b at 36
*w64 |= ((tgt & 0x000000000000007f) << 36); // >>0, <<36
// i_c at 44
*w64 |= ((tgt & 0x0000000000200000) << 23); // >>21, <<44
// imm_5c at 45
*w64 |= ((tgt & 0x00000000001f0000) << 29); // >>16, <<45
// imm_9d at 50
*w64 |= ((tgt & 0x000000000000ff80) << 43); // >>7, <<50
// i at 59
*w64 |= ((tgt & 0x8000000000000000) >> 4); // >>63, <<59
}
void Switch_Patch::apply(char * code_buffer, uint64* target_offset_tbl) {
*entry_addr=(uint64)(code_buffer+target_offset_tbl[switch_target_id]);
}
///////////////////////////////////////////////////////////////////////////
// Emit code from work buffer
///////////////////////////////////////////////////////////////////////////
// find template for a given bundle
static void find_template(Bundle_IR * bundle_ir) {
int i;
for (i=0;i<ENC_N_TMPLT; i++) {
if (bundle_ir->avail_tmplts & (1 << i))
break;
}
assert (i<ENC_N_TMPLT && "No template is available");
bundle_ir->tmplt_number=(EM_Templates)i;
}
// check for end of group between two bundles
static int does_need_stop_between_bundles(Bundle_IR * b1, Bundle_IR * b2, int b1_first_slot,
int b2_n_slots, bool fast_mode) {
for (int i=b1_first_slot; i<ENC_N_SLOTS; i++) {
Instr_IR * instr1=b1->slots + i;
if (instr1->is_instr_head()) {
if (instr1->ends_inst_group())
return true;
for (int j=0; j<b2_n_slots; j++) {
Instr_IR * instr2=b2->slots + j;
if (instr2->is_instr_head() &&
(need_diff_instr_groups(instr1,instr2,fast_mode) || instr2->starts_inst_group()))
return true;
}
}
}
return false;
}
// prepass the bundles to be emitted:
// - find the templates
// - place stop bits
#define MURKA_TRACE 0
void Merced_Code_Emitter::prepass_before_emit(Bundle_IR * first, Bundle_IR * last) {
#if MURKA_TRACE
static FILE * trace_file=0;
static int count = 0;
if (count == 0)
trace_file = fopen ("__trace","w");
count++;
#endif
// find templates
Bundle_IR * p;
for (p=first; p!=last; p=incr_wbuf_ptr(p))
find_template(p);
// place stop bits
Bundle_IR * p_next=0, * group_start=first, * last_processed=decr_wbuf_ptr(last);
int stop1 = tmplt_descr[group_start->tmplt_number].stop_down;
for (p=first; p!=last_processed; p=p_next) {
p->flags &= ~ENC_BDL_needs_stop;
p_next=incr_wbuf_ptr(p);
int stop2=tmplt_descr[p_next->tmplt_number].stop_up;
bool placed_stop=false;
for (Bundle_IR * q=group_start; q!=p_next; q=incr_wbuf_ptr(q)) {
if (does_need_stop_between_bundles(q,p_next,stop1,stop2,fast_reg_dep_check)) {
p->flags |= ENC_BDL_needs_stop;
placed_stop=true;
break;
}
stop1=0;
}
if (placed_stop || tmplt_descr[p_next->tmplt_number].stop_down != 0) {
group_start = p_next;
stop1 = tmplt_descr[p_next->tmplt_number].stop_down;
}
}
last_processed->flags |= ENC_BDL_needs_stop;
}
// emit bundle
void Merced_Code_Emitter::emit_bundle(Bundle_IR * bundle_ir) {
// assert that only first non-empty slot can be a target
assert ((!bundle_ir->slots[1].is_target() || bundle_ir->slots[0].is_empty()) &&
(!bundle_ir->slots[2].is_target() ||
(bundle_ir->slots[0].is_empty() && bundle_ir->slots[1].is_empty())));
// set an offset in a target table
if (bundle_ir->flags & ENC_BDL_is_target) {
uint64 tgt_id=bundle_ir->target_id;
uint64 prev_tgt_id;
do {
prev_tgt_id=target_offset[tgt_id];
target_offset[tgt_id]=curr_offset;
#ifdef _DEBUG
target_offset_is_set[tgt_id]=true;
#endif
tgt_id=prev_tgt_id;
} while (tgt_id != ENC_NOT_A_TARGET);
}
// add patches for current bundle instructions to the patch list
for (int sl_n=0; sl_n<ENC_N_SLOTS; sl_n++) {
if (bundle_ir->slots[sl_n].needs_patching()) {
// two kinds of patches: branches (ST_b) and movl (ST_il)
unsigned patch_target_id=bundle_ir->slots[sl_n].patch_target_id;
switch(bundle_ir->slots[sl_n].syl_type) {
case ST_b:
case ST_bl:
patches=new(mem_pool) Branch_Patch(patches,curr_offset,sl_n,patch_target_id);
break;
case ST_il:
patches=new(mem_pool) Movl_Patch(patches,curr_offset,sl_n, patch_target_id);
break;
default:
DIE(("Unexpected syllable type"));
}
}
}
// prepare space in code buffer
curr_offset+=IPF_INSTRUCTION_LEN;
int i;
unsigned char * cur_bundle = (unsigned char *) _alloc_space(IPF_INSTRUCTION_LEN);
reset_bundle(cur_bundle);
// set the template descriptor
assert(bundle_ir->tmplt_number >=0 && bundle_ir->tmplt_number < ENC_N_TMPLT);
EM_Templates tmplt_number = bundle_ir->tmplt_number;
Template_Descr *td=&(tmplt_descr[tmplt_number]);
// emit the code
encode_template(cur_bundle,tmplt_number,bundle_ir->flags & ENC_BDL_needs_stop);
for (i=0;i<ENC_N_SLOTS; i++) {
Instr_IR * instr=&(bundle_ir->slots[i]);
if (instr->is_empty())
encode_slot(cur_bundle, typed_nop(td->syl_type[i]),i);
else
encode_slot(cur_bundle, instr->code_image1,i);
}
}
// emit several bundles from a working buffer
void Merced_Code_Emitter::emit_several_bundles(Bundle_IR * first, Bundle_IR * last) {
prepass_before_emit(first,last);
for (Bundle_IR * p=first; p!=last; p=incr_wbuf_ptr(p))
emit_bundle(p);
}
// emit all bundles from a working buffer
void Merced_Code_Emitter::emit_all() {
if (!wbuf_is_empty) {
if (wbuf_last->is_not_empty())
new_bundle_with_no_emit();
emit_several_bundles(wbuf_first, wbuf_last);
wbuf_first=wbuf_last=wbuf;
wbuf_last->init(fast_reg_dep_check);
wbuf_is_empty=true;
gl_first_empty_slot=0;
}
}
// move to new bundle assuming that there's space
void Merced_Code_Emitter::new_bundle_with_no_emit() {
wbuf_last=incr_wbuf_ptr(wbuf_last);
assert (wbuf_last != wbuf_first);
wbuf_last->init(fast_reg_dep_check);
last_empty_slot=0;
#ifdef ENABLE_ENCODER_STATS
n_new_bundles++;
#endif
}
// Handle buffer overflow
void Merced_Code_Emitter::buffer_overflow() {
Bundle_IR * last=incr_wbuf_ptr_by(wbuf_first,ENC_WBUF_LEN/2);
emit_several_bundles(wbuf_first,last);
wbuf_first=last;
// If working in an exchange instr mode find first empty slot
gl_first_empty_slot=0;
if (exch_instr) {
Instr_IR * gl_last_empty_slot = wbuf_last->slots + last_empty_slot;
if (gl_last_empty_slot == slot_end) {
assert (last_empty_slot == ENC_N_SLOTS);
gl_last_empty_slot = wbuf->slots;
}
for (Instr_IR * sl=last->slots; sl != gl_last_empty_slot; sl=incr_slot_ptr(sl)) {
if (sl->is_empty() && !sl->should_stay_empty()) {
gl_first_empty_slot=sl;
break;
}
}
}
}
// Start a new bundle.
// There always should be one empty bundle in wbuf.
// If our new bundle is the last one, emit half of the buffer.
void Merced_Code_Emitter::new_bundle() {
if (incr_wbuf_ptr(incr_wbuf_ptr(wbuf_last)) == wbuf_first)
buffer_overflow();
new_bundle_with_no_emit();
}
////////////////////////////////////////////////////////////////////////////////
// class Merced_Code_Emitter_GC1
////////////////////////////////////////////////////////////////////////////////
void Merced_Code_Emitter_GC1::place_instr_into_slot(Bundle_IR * bundle, int slot,
Unsch_Instr_IR& instr, unsigned need_stop) {
Merced_Code_Emitter::place_instr_into_slot(bundle,slot,instr,need_stop);
// Track reference definitions
unsigned is_call=instr.is_call();
uint64 offset = 0;
if (instr.def_ref != ENC_REF_dontcare || instr.is_call())
offset = curr_offset + ((bundle - wbuf_first)*ENC_N_SLOTS + slot)*IPF_INSTRUCTION_LEN;
if (is_call) {
#ifndef _NDEBUG
unscheduled_call=false;
#endif
call_site_def_ref.clear_all();
if (offset < gc_point) {
for (unsigned i=0;i<gc_point_def_ref.get_size(); i++) {
if (curr_cleared_refs.is_clear(i))
gc_point_def_ref.clear(i);
}
}
}
unsigned dest_ref=instr.dest_ref;
if (instr.def_ref == ENC_REF_set) {
call_site_def_ref.set(dest_ref,ENC_RI_ref);
// check for interior pointer
unsigned base_pos=instr.base_pos;
if (base_pos != ENC_NOT_A_BASE && call_site_def_ref.is_ref(base_pos)) {
call_site_def_ref.add(base_pos, ENC_RI_base);
if (instr.ref_info & ENC_RI_copy)
call_site_def_ref.copy(dest_ref,base_pos);
else
call_site_def_ref.add(dest_ref, (instr.ref_info | (uint64)base_pos));
}
if (offset < gc_point) {
assert(base_pos == ENC_NOT_A_BASE ||
(call_site_def_ref.is_ref(base_pos) == gc_point_def_ref.is_ref(base_pos)));
gc_point_def_ref.set(dest_ref,call_site_def_ref.get(dest_ref));
if (gc_point_def_ref.is_intr_ptr(dest_ref))
gc_point_def_ref.add(base_pos, ENC_RI_base);
}
}
else if (instr.def_ref == ENC_REF_reset) {
call_site_def_ref.clear(dest_ref);
if (offset < gc_point)
gc_point_def_ref.clear(dest_ref);
}
}
void Merced_Code_Emitter_GC1::emit_bundle(Bundle_IR * bundle) {
Merced_Code_Emitter::emit_bundle(bundle);
if (curr_offset >= gc_point)
done_upto_GC = true;
}
////////////////////////////////////////////////////////////////////////////////
// class Merced_Code_Emitter_GC2
////////////////////////////////////////////////////////////////////////////////
Merced_Code_Emitter_GC2::~Merced_Code_Emitter_GC2() {
apr_allocator_free(allocator, ref_bit_arena0);
}
void Merced_Code_Emitter_GC2::_alloc_ref_bit_arena(unsigned size) {
assert(ref_bit_arena0==NULL);
ref_bit_arena0 = ref_bit_arena = apr_allocator_alloc(allocator, size);
n_ref_bit = 0;
}
void Merced_Code_Emitter_GC2::copy_ref_bits(char *ref_bit_buffer) {
if (n_ref_bit) {
assert(ref_bit_buffer);
char *rp = ref_bit_buffer;
for(apr_memnode_t *ap = ref_bit_arena0; ap != NULL; ap = ap->next) {
char * mem_begin = (char *)ap + APR_MEMNODE_T_SIZE;
for(char *p = mem_begin; p < ap->first_avail; ) {
*rp++ = *p++;
}
}
}
_free_arena(ref_bit_arena0);
}
void Merced_Code_Emitter_GC2::emit_bundle(Bundle_IR * bundle) {
Instr_IR * instr=bundle->slots;
for (int i=0;i<ENC_N_SLOTS; i++,instr++) {
if (instr->def_ref != ENC_REF_dontcare)
encode_def_ref_bit(instr->def_ref);
}
Merced_Code_Emitter::emit_bundle(bundle);
}
void Merced_Code_Emitter_GC2::encode_def_ref_bit(U_8 def_ref) {
unsigned shift = n_ref_bit++ % 8;
if (shift==0) {
if (ref_bit_arena->first_avail >= ref_bit_arena->endp) {
assert(ref_bit_arena->first_avail == ref_bit_arena->endp);
assert(!ref_bit_arena->next);
// allocate new node of default size
ref_bit_arena->next = apr_allocator_alloc(allocator, 0);
ref_bit_arena = ref_bit_arena->next;
assert(ref_bit_arena->next == NULL);
assert(ref_bit_arena->first_avail < ref_bit_arena->endp);
}
*(ref_bit_arena->first_avail++) = ENC_REF_dontcare;
}
if (def_ref==ENC_REF_set) {
*(ref_bit_arena->first_avail - 1) |= (1<<shift);
} else
assert(def_ref==ENC_REF_reset);
}
////////////////////////////////////////////////////// begin brl patching
MCE_brl_patch_list::MCE_brl_patch_list()
{
patches = 0;
} //MCE_brl_patch_list::MCE_brl_patch_list
void MCE_brl_patch_list::add_patch(MCE_brl_patch *patch)
{
patch->set_next(patches);
patches = patch;
} //MCE_brl_patch_list::add_patch
MCE_brl_patch::MCE_brl_patch(uint64 br_target, uint64 br_offset)
{
next = 0;
target = br_target;
offset = br_offset;
} //MCE_brl_patch::MCE_brl_patch
void MCE_brl_patch::set_next(MCE_brl_patch *next_patch)
{
next = next_patch;
} //MCE_brl_patch::set_next
uint64 MCE_brl_patch::get_target()
{
return target;
} //MCE_brl_patch::get_target
uint64 MCE_brl_patch::get_offset()
{
return offset;
} //MCE_brl_patch::get_offset
MCE_brl_patch *MCE_brl_patch::get_next()
{
return next;
} //MCE_brl_patch::get_next
#ifdef DUMP_IPF_INSTRUCTIONS
static int get_bit(uint64 *p, int bit_num)
{
int word_num = bit_num / 64;
int bit = bit_num % 64;
uint64 mask = ((uint64)1) << bit;
if(mask & p[word_num]) {
return 1;
} else {
return 0;
}
} //get_bit
static void dump_ipf_instr(void *addr)
{
uint64 *p = (uint64 *)addr;
for(int i = 127; i >= 0; i--) {
printf("%d", get_bit(p, i));
}
printf("\n");
} //dump_ipf_instr
static void print_header()
{
int i;
for(i = 127; i >= 0; i--) {
printf("%d", (i % 10));
}
printf("\n");
for(i = 127; i >= 0; i--) {
if((i % 10) == 0) {
printf("%d", ((i / 10) % 10));
} else {
printf(" ");
}
}
printf("\n");
printf(" ***** tmpl\n");
printf(" ***************************************** instr 0\n");
printf(" ***************************************** instr 1\n");
printf("***************************************** instr 2\n");
printf("\n");
printf("\n");
} //print_header
static void dump_bytes(U_8* addr, int num_bytes)
{
for(int i = 0; i < num_bytes; i++) {
printf("%2d: %2x\n", i, addr[i]);
}
} //dump_bytes
void dump_ipf_instructions(void* addr, int num_instr)
{
print_header();
for(int i = 0; i < num_instr; i++) {
dump_ipf_instr((void *)(((U_8*)addr) + (16 * i)));
}
dump_bytes((U_8*)addr, 16 * num_instr);
} //dump_ipf_instructions
#endif //DUMP_IPF_INSTRUCTIONS
void set_bit(uint64 *p, bool value, int bit_num)
{
int word_num = bit_num / 64;
int bit = bit_num % 64;
uint64 mask = ((uint64)1) << bit;
if(value) {
p[word_num] |= mask;
} else {
p[word_num] &= ~mask;
}
} //set_bit
// Set len bits at position pos with the value in the memory stream pointed by p.
void set_bits(uint64 *p, uint64 value, int pos, int len)
{
for(int i = 0; i < len; i++) {
uint64 mask = ((uint64)1) << i;
bool bit_value = (value & mask) != 0;
set_bit(p, bit_value, pos + i);
}
} //set_bits
void Merced_Code_Emitter::apply_brl_patch(MCE_brl_patch *patch, char *code_buffer)
{
uint64 target = patch->get_target();
uint64 offset = patch->get_offset();
U_8* curr_ip = (((U_8*)code_buffer) + offset);
#ifdef DUMP_IPF_INSTRUCTIONS
printf("Merced_Code_Emitter::apply_brl_patch: code_buffer=%p, offset=%p (curr_ip=%p), target=%p\n",
code_buffer, offset, curr_ip, target);
dump_ipf_instructions(curr_ip, 1);
#endif //DUMP_IPF_INSTRUCTIONS
uint64 imm64 = target - (uint64)curr_ip;
uint64 temp = imm64;
uint64 i;
uint64 imm39;
uint64 imm20b;
temp >>= 4; // ignore 4 least significant bits
imm20b = temp & 0xfFfFf;
temp >>= 20;
imm39 = temp & 0x7fFFFFffff;
temp >>= 39;
i = temp & 1;
temp >>= 1;
assert(temp == 0);
int offset_instr1 = 46;
int offset_instr2 = 87;
set_bits((uint64 *)curr_ip, i, offset_instr2 + 36, 1);
set_bits((uint64 *)curr_ip, imm20b, offset_instr2 + 13, 20);
set_bits((uint64 *)curr_ip, imm39, offset_instr1 + 2, 39);
#ifdef DUMP_IPF_INSTRUCTIONS
printf("imm64=%p, imm20b=%p, imm39=%p, i=%p\n", imm64, imm20b, imm39, i);
dump_ipf_instructions(curr_ip, 1);
#endif //DUMP_IPF_INSTRUCTIONS
} //Merced_Code_Emitter::apply_brl_patch
void Merced_Code_Emitter::apply_brl_patches(char *code_buffer)
{
for(MCE_brl_patch *patch = brl_patches->get_patch_list(); patch; patch = patch->get_next()) {
apply_brl_patch(patch, code_buffer);
}
} //Merced_Code_Emitter::apply_brl_patches
//////////////////////////////////////////////////////// end brl patching