blob: 1a1ba2cad04837bb6eb0c1de0d7af11c4506c19c [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
* @author Alexander Astapchuk
#include "compiler.h"
#include "arith_rt.h"
#include "trace.h"
#include "stats.h"
#ifdef WIN32
#include <malloc.h>
#include <stdlib.h>
#include <stdarg.h>
#include <algorithm>
using std::min;
#include <open/vm.h>
#include <open/vm_ee.h>
#include <open/vm_class_info.h>
#include <open/vm_class_loading.h>
#include <jit_runtime_support.h>
#include <jit_intf.h>
#include <jni_types.h>
#include "port_threadunsafe.h"
* @file
* @brief Common CodeGen's routines and datas.
namespace Jitrino {
namespace Jet {
const CallSig ci_helper_o(CCONV_HELPERS, jvoid, jobj);
const CallSig ci_helper_v(CCONV_HELPERS, jvoid);
const CallSig ci_helper_oi(CCONV_HELPERS, jobj, jobj, i32);
const CallSig ci_helper_lazy(CCONV_MANAGED, jvoid, jobj, jobj);
const CallSig ci_helper_linkerr(CCONV_HELPERS, jvoid, jobj, i32, i32);
void CodeGen::do_mov(const Val& dst_s, const Val& src_s, bool skipTypeCheck)
jtype sjt = jtmov(src_s.jt());
jtype djt = jtmov(dst_s.jt());
assert(skipTypeCheck || sjt == djt);
if (dst_s.is_mem() && src_s.is_mem()) {
// Need to allocate a reg
Opnd src(sjt, src_s.base(), src_s.disp(), src_s.index(), src_s.scale());
Opnd dst(djt, dst_s.base(), dst_s.disp(), dst_s.index(), dst_s.scale());
AR ar = valloc(src.jt());
Opnd reg(src.jt(), ar);
mov(reg, src);
mov(dst, reg);
if (src_s.is_imm() && (is_f(src_s.jt()))) {
// store to memory [first, then upload to register]
Opnd dst;
if (dst_s.is_mem()) {
dst = dst_s.as_opnd(i32);
else {
// 'mov (flt32/dbl64) fr, imm' - store to scratch area first,
// then upload to register
dst = Opnd(i32, m_base, voff(m_stack.scratch()));
if (src_s.jt() == flt32) {
float f = src_s.fval();
Opnd val(*(int*)&f);
mov(dst, val);
else {
double d = src_s.dval();
Opnd val_lo(*(int*)&d);
mov(dst, val_lo);
Opnd dst_hi(i32, dst.base(), dst.disp()+4, dst.index(),
Opnd val_hi(*(1+(int*)&d));
mov(dst_hi, val_hi);
// Succesfully stored immediate to memory. Now, if dst_s is regiter
// upload the value to it
if (dst_s.is_reg()) {
Opnd scratch = Opnd(sjt, m_base, voff(m_stack.scratch()));
mov(dst_s.as_opnd(), scratch);
mov(dst_s.as_opnd(djt), src_s.as_opnd(sjt));
void CodeGen::gen_check_null(unsigned depth)
Val& obj = vstack(depth);
gen_check_null(obj, true);
void CodeGen::gen_check_null(Val& obj, bool hw_ok)
assert(obj.jt() == jobj);
if (obj.has(VA_NZ)) {
if (is_set(DBG_TRACE_CG)) {
dbg(";;>check.npe for %s - skipped\n",
if (obj.is_imm()) {
if (obj.pval() == NULL_REF) {
//gen_args(const CallSig& cs, unsigned idx, const Val * parg0 = NULL,
// const Val * parg1 = NULL, const Val * parg2 = NULL);
Class_Handle npeClass = vm_lookup_class_with_bootstrap(NULL_POINTER_EXCEPTION);
gen_throw(npeClass, false);
// Try to guess whether we can use hardware NPE @ this point.
// First, check if the method may use it at all:
// No catch handler in the method - any exception exits the method
// !synchronized => to avoid the following:
// synchronized method throws HW NPE
// GC starts during handling in VM
// rt_enum() detects that the exception cames from HW and reports
// nothing => 'this' gets no references and killed by GC
// after the GC, VM gets that exception comes from inside the synch
// method. VM tries to release monitor => asks for
// jit_get_address_of_this, but this address already collected by
// GC as garbage - everyone's dead.
// Also, we can not use HW checks for compressed references - the
// uncompressed null reference in not zero.
bool useHW = hw_ok && get_bool_arg("hwnpe", true) && !g_refs_squeeze;
if (meth_is_sync_inst() || meth_num_handlers() != 0) {
useHW = false;
// Now, all variables must be as if we were entering catch handler -
// the locals on callee-save registers must be on that registers and
// other locals must be in the memory
if (useHW) {
unsigned vars=m_infoBlock.get_num_locals();
for (unsigned i=0; i<vars; i++){
const Val& var = m_jframe->var(i);
AR ar = vreg(var.jt(), i);
if (ar == ar_x) continue; // no global allocation - skip it
if (is_callee_save(ar)) {
if (!var.is_reg()) {
// spilled callee-save globally allocated register.
// hmmm... how comes ? anyway, can't use HW check
useHW = false;
else {
assert(var.reg() == ar);
else {
// Scratch register holds a variable - can't use HW check
if (!var.is_mem()) {
useHW = false;
if (useHW) {
// Current check is based on the presumption that the fisrt 64K of
// address space raises hardware NPE. However, an object may occupy
// more than 64K - e.g. for array of longs, the (8192+1) items
// reside in more that 64K. Need to check whether 64k+ also raises
// the NPE, or change the NPE checks for field and array accesses.
if (is_set(DBG_TRACE_CG)) {
dbg(";;>check.npe.HW for %s\n", to_str(obj.as_opnd()).c_str());
else {
if (is_set(DBG_TRACE_CG)) {
dbg(";;>check.npe for %s\n", to_str(obj.as_opnd()).c_str());
Opnd opnd = obj.as_opnd();
if (g_refs_squeeze) {
//AR gr_null = rfind(Val(jobj, NULL_REF));
//if (gr_null == ar_x) {
// gr_null = valloc(jobj);
// movp(gr_null, NULL_REF);
// rset(gr_null, Val(jobj, NULL_REF));
AR gr_null = valloc(jobj);
movp(gr_null, NULL_REF);
alu(alu_cmp, opnd, gr_null);
else {
assert(NULL_REF == 0);
if (opnd.is_reg()) {
alu(alu_test, opnd, opnd);
} else {
// There is no imm64 for cmp. But it is not a problem as we compare with zero.
alu(alu_cmp, opnd, Opnd((int)(int_ptr)NULL_REF));
unsigned br_off = br(ne, 0, 0, taken);
Class_Handle npeClass = vm_lookup_class_with_bootstrap(NULL_POINTER_EXCEPTION);
gen_throw(npeClass, true);
patch(br_off, ip());
} // if !useHW
// Mark the object 'non-null, guaranteed'
// Propagate 'non-null' attribute to the same items in the frame
for (unsigned i=1; i<m_jframe->size(); i++) {
Val& that = m_jframe->dip(i);
if (that == obj) {
for (unsigned i=0; i<m_infoBlock.get_num_locals(); i++) {
Val& var = m_jframe->var(i);
if (var == obj) {
if (is_set(DBG_TRACE_CG) && !useHW) { dbg(";;>~check.npe\n"); }
void CodeGen::gen_check_bounds(unsigned ref_depth, unsigned index_depth)
if (is_set(DBG_TRACE_CG)) {dbg(";;>check.bounds\n");}
const Opnd arr = vstack(ref_depth, true).as_opnd();
const Opnd idx = vstack(index_depth, vis_mem(index_depth)).as_opnd();
Opnd len(i32, arr.reg(), rt_array_length_offset);
if (idx.is_reg()) {
alu(alu_cmp, len, Opnd(i32, idx.reg()));
else {
alu(alu_cmp, len, Opnd(idx.ival()));
// Unsigned condition here - aka 'len > (unsigned)index' - this also
// covers 'index < 0' - in a single comparation.
unsigned br_off = br(above, 0, 0, taken);
//gen_call_vm_restore(true, ci_helper_v, rt_helper_throw_out_of_bounds, 0);
Class_Handle ioobClass = vm_lookup_class_with_bootstrap(INDEX_OUT_OF_BOUNDS);
gen_throw(ioobClass, true);
patch(br_off, ip());
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.bounds\n");}
void CodeGen::gen_check_div_by_zero(jtype jt, unsigned divizor_depth)
const Val& s = vstack(divizor_depth);
if ((s.is_imm() && s.ival() != 0) || s.has(VA_NZ)) {
// not zero - guaranteed
if (is_set(DBG_TRACE_CG)) {dbg(";;>check.div_by_zero\n");}
// The first Val is immediate and zero
if (s.is_imm() && s.ival() == 0) {
// if it's i32, then nothing to do more - throw exception ...
if (jt == i32) {
// IS zero. Why do people want to divide on zero explicitly?..
//gen_call_throw(ci_helper_v, rt_helper_throw_div_by_zero_exc, 0);
Class_Handle aeClass = vm_lookup_class_with_bootstrap(DIVIDE_BY_ZERO_EXCEPTION);
gen_throw(aeClass, false);
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
else if (is_big(jt)) {
// ... otherwise check high part of long constant ...
const Val& shi = m_jframe->dip(divizor_depth+1);
if (shi.is_imm() && shi.ival() == 0) {
// ... yes, it's zero too - throw ...
// Why do people want to divide on zero explicitly?..
//gen_call_throw(ci_helper_v, rt_helper_throw_div_by_zero_exc, 0);
Class_Handle aeClass = vm_lookup_class_with_bootstrap(DIVIDE_BY_ZERO_EXCEPTION);
gen_throw(aeClass, false);
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
else if (shi.is_imm() && shi.ival() != 0) {
// ... no, the high part is not zero - may return.
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
// fall through to the next checks
// Long value on the stack and we are on 64bit platform - check
// a single constant at once
if (s.is_imm() && jt == i64 && !is_big(i64)) {
if (s.lval() == 0) {
// IS zero. Why do people want to divide on zero explicitly?..
//gen_call_throw(ci_helper_v, rt_helper_throw_div_by_zero_exc, 0);
Class_Handle aeClass = vm_lookup_class_with_bootstrap(DIVIDE_BY_ZERO_EXCEPTION);
gen_throw(aeClass, false);
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
// at this point:
assert(!s.is_imm() || (s.is_imm() && s.ival()==0 && jt == i64));
// .. and 'shi' is not immediate
assert(jt != i64 || !vis_imm(divizor_depth+1));
if (s.is_reg()) {
Opnd reg = s.as_opnd(jtmov(jt));
alu(alu_test, reg, reg);
else {
Opnd mem = s.as_opnd(jtmov(jt));
alu(alu_cmp, mem, Opnd(0));
if (jt == i32 || !is_big(jt)) {
unsigned br_off = br(nz, 0, 0, taken);
//gen_call_vm_restore(true, ci_helper_v, rt_helper_throw_div_by_zero_exc, 0);
Class_Handle aeClass = vm_lookup_class_with_bootstrap(DIVIDE_BY_ZERO_EXCEPTION);
gen_throw(aeClass, true);
patch(br_off, ip());
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
unsigned br_off = NOTHING;
const Val& shi = m_jframe->dip(divizor_depth+1);
if (!s.is_imm()) {
// jump around the further check --> [1]
br_off = br(nz, 0, 0, hint_none);
// NB: the code generated below (till if(is_set)) may be jumped over
// so no methods should be invoked that may change location of an item
// - like vstack(), vswap() etc. If it's necessary to invoke such
// methods, this should be done before the br() above.
if (shi.is_reg()) {
Opnd reg = shi.as_opnd(i32);
alu(alu_test, reg, reg);
else {
Opnd mem = shi.as_opnd(i32);
alu(alu_cmp, mem, Opnd(0));
unsigned br_hi = br(nz, 0, 0, taken);
//gen_call_vm_restore(true, ci_helper_v, rt_helper_throw_div_by_zero_exc, 0);
Class_Handle aeClass = vm_lookup_class_with_bootstrap(DIVIDE_BY_ZERO_EXCEPTION);
gen_throw(aeClass, true);
patch(br_hi, ip());
if (!s.is_imm()) {
// [1] --> connect to here
patch(br_off, ip());
if (is_set(DBG_TRACE_CG)) {dbg(";;>~check.div_by_zero\n");}
void CodeGen::gen_brk(void)
void CodeGen::gen_gc_stack(int depth /*=-1*/, bool trackIt /*=false*/)
if (depth == -1) {
depth = m_jframe->size();
// prepare GC info for stack
// Store the current depth
if (m_bbstate->stack_depth == (unsigned)depth) {
if (is_set(DBG_TRACE_CG)) {
dbg(";;>GC.stack.depth - skipped (%d)\n", depth);
else {
Opnd op_depth(i32, m_base, voff(m_stack.info_gc_stack_depth()));
mov(op_depth, depth);
if (trackIt) {
m_bbstate->stack_depth = depth;
if (depth == 0) {
unsigned n_words = words(depth);
if (n_words != 0) {
unsigned gc_word = 0;
unsigned size = min(m_jframe->size(), WORD_SIZE);
for (unsigned i=0; i<size; i++) {
const Val& s = m_jframe->at(i);
if (s.jt() != jobj) continue;
if (vvar_idx(s) != -1) continue;
if (s.is_reg() && is_callee_save(s.reg())) continue;
if (s.survive_calls()) continue;
gc_word |= 1<<i;
// check whether we do need to store first word
if (m_bbstate->stack_mask_valid &&
gc_word == m_bbstate->stack_mask) {
// do not need to update the GC mask, it's the same
if (is_set(DBG_TRACE_CG)) {
dbg(";;>GC.stack.mask - skipped(0x%X)\n", gc_word);
else {
Opnd op_mask(i32, m_base, 0*sizeof(int)+ voff(m_stack.info_gc_stack()));
mov(op_mask, gc_word);
if (trackIt) {
m_bbstate->stack_mask = gc_word;
m_bbstate->stack_mask_valid = true;
// store the bit masks
unsigned size = m_jframe->size();
for (unsigned i = 1; i < n_words; i++) {
unsigned pos = i*WORD_SIZE; // where to start
unsigned end_pos = min(pos + WORD_SIZE, size);
unsigned gc_word = 0;
for ( ; pos < end_pos; pos++) {
const Val& s = m_jframe->at(pos);
if (s.jt() != jobj) continue;
if (vvar_idx(s) != -1) continue;
if (s.is_reg() && is_callee_save(s.reg())) continue;
if (s.survive_calls()) continue;
gc_word |= 1<<pos;
Opnd op_mask(i32, m_base, i*sizeof(int)+ voff(m_stack.info_gc_stack()));
mov(op_mask, gc_word);
void CodeGen::gen_gc_mark_local(jtype jt, unsigned idx)
jtype jtvar = vtype(idx);
if (jtvar != jvoid && jtvar != jobj) {
// the variable is known as never contains an object.
assert(jt != jobj);
if (m_infoBlock.get_flags() & DBG_TRACE_CG) {
dbg(";;> skipping GC mark - the item is known to be non-object\n");
bool mark = jt == jobj;
Val& v = vlocal(jt, idx);
// If an item was already marked and its type is still the same, then
// skip the mark
if ((v.has(VA_MARKED)) &&
((v.type() == jobj && mark) || (v.type() != jobj && !mark))) {
if (is_set(DBG_TRACE_CG)) {
dbg(";;>GC mark skipped - object type is known\n");
// prepare GC info for variable
unsigned offset;
unsigned bitno;
AR ar = vreg(jt, idx);
if (ar != ar_x && is_callee_save(ar)) {
// mark the callee-save register
offset = m_stack.info_gc_regs();
bitno = bit_no(ar_idx(ar));
else if (vis_arg(idx)) {
// mark input argument
unsigned i = vget_arg(idx);
assert(m_ci.reg(i) == ar_x);
assert(0 ==;
int inVal =;
unsigned word = word_no(inVal);
offset = word*sizeof(int) + m_stack.info_gc_args();
bitno = bit_no(inVal);
else {
// mark the Val in the stack frame
unsigned word = word_no(idx);
offset = word*sizeof(int) + m_stack.info_gc_locals();
bitno = bit_no(idx);
unsigned mask = 1<<bitno;
if (is_wide(jt)) {
// We must also clear bit for the next Val
if (bitno < 31) {
// good. can do in one touch
mask |= 1 << (bitno+1);
else {
// Bad, the next Val crosses the word's boundary
unsigned offset2 = offset + sizeof(int);
unsigned mask2 = 1;
const Opnd opnd2(i32, m_base, voff(offset2));
alu(alu_and, opnd2, Opnd(~mask2));
const Opnd opnd(i32, m_base, voff(offset));
if (mark) {
alu(alu_or, opnd, Opnd(mask));
else {
alu(alu_and, opnd, Opnd(~mask));
unsigned CodeGen::gen_stack_to_args(bool pop, const CallSig& cs,
unsigned idx, int cnt)
assert(idx <= cs.count());
unsigned num = cs.count() - idx;
if (cnt == -1) {
cnt = num;
// A special case on IA32 - our frame layout fits best for the managed
// calling convention, and the args can be prepared in a few
// instructions. - later
// TODO: it's not only for 'MANAGED_IA32' but for 'l2r && callee pops
// && !align stack'.
if (false && pop && idx == 0 && cnt == (int)cs.count() &&
int fix = 0;
if (cnt != 0) {
// find the difference between last used slot and the end of
// stack frame
int s = m_stack.stack_slot(m_jframe->depth2slot(0));
fix = m_stack.size() + s; // s is < 0
for (unsigned i=0; i<(unsigned)cnt; i++) {
jtype jt = m_jframe->top();
if (is_big(jt)) {
if (fix != 0) {
alu(alu_add, sp, fix);
return fix;
if (idx == 0 && cs.size() != 0) {
alu(alu_sub, sp, cs.size());
int depth = 0;
// 1st pass - free all register that are used for args passing
for (int i=0; !( & CCONV_MEM) && i<cnt; i++) {
unsigned arg_id = idx+cnt-i-1;
AR ar = cs.reg(arg_id);
if (ar == ar_x) continue;
for (int i=0; i<cnt; i++) {
unsigned arg_id = idx+cnt-i-1;
jtype jt = cs.jt(arg_id);
if (jt<i32) {
jt = i32;
const Val& s = m_jframe->dip(depth);
if (cs.reg(arg_id) != ar_x) {
Opnd rarg(jt, cs.reg(arg_id));
do_mov(rarg, s);
else {
jtype jtm = jtmov(jt);
Opnd arg(jtm, sp,;
do_mov(arg, s);
if (is_big(jt)) {
Opnd arg_hi(jtm, sp,;
const Val& s_hi = m_jframe->dip(depth+1);
do_mov(arg_hi, s_hi);
if (pop) {
else {
depth += is_wide(jt) ? 2 : 1;
return 0;
void CodeGen::gen_call_throw(const CallSig& cs, void * target,
unsigned idx, ...)
// say 'stack is empty'
gen_gc_stack(0, false);
va_list valist;
va_start(valist, idx);
AR gr = valloc(jobj);
call_va(is_set(DBG_CHECK_STACK), gr, target, cs, idx, valist);
#ifdef _DEBUG
// just to make sure we do not return from there
void CodeGen::gen_call_vm(const CallSig& cs, void * target,
unsigned idx, ...)
gen_gc_stack(-1, true);
va_list valist;
va_start(valist, idx);
AR gr = valloc(jobj);
call_va(is_set(DBG_CHECK_STACK), gr, target, cs, idx, valist);
void CodeGen::gen_call_novm(const CallSig& cs, void * target,
unsigned idx, ...)
va_list valist;
va_start(valist, idx);
AR gr = valloc(jobj);
call_va(is_set(DBG_CHECK_STACK), gr, target, cs, idx, valist);
void CodeGen::gen_call_vm_restore(bool exc, const CallSig& cs,
void * target, unsigned idx, ...)
BBState saveBB = *m_bbstate;
// 1. store scratch registers in a secret place
// 2. park everything
// 3. call whatever
// 4. restore scratch regs from the secret place
// 5. restore the state for callee-save registers
// 1.
bool saveScratch = !exc;
for (unsigned i=0; i<ar_num; i++) {
AR ar = _ar(i);
if (is_callee_save(ar)) continue;
if (saveScratch && rrefs(ar) != 0) {
jtype jt = is_f(ar) ? dbl64 : jobj;
Opnd mem(jt, m_base, voff(m_stack.spill(ar)));
Opnd reg(jt, ar);
mov(mem, reg);
if (rlocks(ar) != 0) {
runlock(ar, true);
// 2.
gen_gc_stack(-1, true);
// 3.
va_list valist;
va_start(valist, idx);
AR gr = valloc(jobj);
call_va(is_set(DBG_CHECK_STACK), gr, target, cs, idx, valist);
// 4.
// Restore BBState first, so ref_counts for registers become valid
*m_bbstate = saveBB;
// restore the registers state
for (unsigned i=0; saveScratch && i<ar_num; i++) {
AR ar = _ar(i);
if (is_callee_save(ar)) continue;
if (rrefs(ar) != 0) {
jtype jt = is_f(ar) ? dbl64 : jobj;
Opnd mem(jt, m_base, voff(m_stack.spill(ar)));
Opnd reg(jt, ar);
mov(reg, mem);
// 5.
// Actually nothing to do here.
// If we had a local var on register before, then it's still on the reg
// If we had the var with static assignment which was in memory, before,
// then the memory was not corrupted.
// So, just nothing to do with callee-save regs
void CodeGen::gen_throw(Class_Handle exnClass, bool restore)
#ifdef HYX86_64
bool lazy = false;
bool lazy = true;
BBState saveBB;
//TODO: Workaround for x86-64 stack should be aligned to half of 16
#ifdef HYX86_64
alu(alu_sub, sp, (unsigned)STACK_SLOT_SIZE);
alu(alu_and, sp, ~((unsigned)STACK_SLOT_SIZE));
alu(alu_add, sp, (unsigned)STACK_SLOT_SIZE);
if (restore){
saveBB = *m_bbstate;
for (unsigned i=0; i<ar_num; i++) {
AR ar = _ar(i);
if (rlocks(ar) != 0) {
runlock(ar, true);
if (lazy) {
gen_call_throw(ci_helper_lazy, rt_helper_throw_lazy, 0, exnClass, NULL);
} else {
static const CallSig ci_new(CCONV_HELPERS, jobj, i32, jobj);
unsigned size = (unsigned)class_get_object_size(exnClass);
unsigned stackFix;
Allocation_Handle ah = class_get_allocation_handle(exnClass);
gen_call_vm(ci_new, rt_helper_new, 0, size, ah);
static const CallSig cs_constructor(CCONV_MANAGED, jvoid, jobj);
static Method_Handle constructorMethDesc =
class_lookup_method_recursively(exnClass, DEFAUlT_COSTRUCTOR_NAME,
static char* constructorMethAddr =
stackFix = gen_stack_to_args(false, cs_constructor, 0);
gen_call_vm(cs_constructor, constructorMethAddr, 1);
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
static const CallSig cs_throw(CCONV_HELPERS, jvoid, jobj);
stackFix = gen_stack_to_args(true, cs_throw, 0);
gen_call_vm(cs_throw, rt_helper_throw, 1);
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
// Restore BBState first, so ref_counts for registers become valid
if (restore){
*m_bbstate = saveBB;
}}; // ~namespace Jitrino::Jet