blob: 38d87a1d30acbdf279634308363fe30e6134c33a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Alexander Astapchuk
*/
/**
* @file
* @brief CodeGen's routines for method invocation and prolog/epilogue
* generation.
*/
#include "compiler.h"
#include "trace.h"
#include <open/vm.h>
#include <jit_import.h>
//#include <jit_intf.h>
#include "open/vm_ee.h"
#if !defined(_IPF_)
#include "enc_ia32.h"
#endif
#include <algorithm>
using std::min;
namespace Jitrino {
namespace Jet {
/**
* CallSig for monitor_enter and monitor_exit helpers.
*/
static const CallSig cs_mon(CCONV_HELPERS, jvoid, jobj);
/**
* CallSig for Class* to java.lang.Class helpers.
*/
static const CallSig cs_jlc(CCONV_HELPERS, jobj, jobj);
void Compiler::gen_prolog(void) {
if (is_set(DBG_TRACE_CG)) {
dbg(";; ========================================================\n");
dbg(";; Prolog: max_stack=%d, num_locals=%d, in_slots=%d\n",
m_stack.get_max_stack(),
m_stack.get_num_locals(),
m_stack.get_in_slots());
dbg(";; info_gc_stack_depth=%d, info_gc_locals=%d, info_gc_stack=%d",
m_stack.info_gc_stack_depth(),
m_stack.info_gc_locals(),
m_stack.info_gc_stack());
dbg(";; stack_bot=%d, stack_max=%d\n",
m_stack.stack_bot(), m_stack.stack_max());
dbg(";; local(0)=%d\n", m_stack.local(0));
dbg(";; native_stack_bot=%d\n", m_stack.unused());
dbg(";; ========================================================\n");
}
unsigned prologStart = ipoff();
//
// Debugging things
//
// Ensure stack is aligned properly.
unsigned alignment = (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) ? CCONV_STACK_ALIGN16
: m_ci.cc() & CCONV_STACK_ALIGN_MASK;
if (is_set(DBG_CHECK_STACK) && alignment != 0) {
if (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) {
alu(alu_sub, sp, (unsigned)STACK_SLOT_SIZE);
}
alu(alu_test, sp, (alignment - 1));
unsigned br_off = br(eq, 0, 0);
gen_dbg_rt(false, "Misaligned stack @ %s", meth_fname());
gen_brk();
patch(br_off, ip());
if (m_ci.cc() & CCONV_STACK_ALIGN_HALF16) {
alu(alu_add, sp, (unsigned)STACK_SLOT_SIZE);
}
}
if (is_set(DBG_BRK)) {
gen_brk();
}
if (m_infoBlock.get_bc_size() == 1 && m_bc[0] == OPCODE_RETURN && !g_jvmtiMode) {
// empty method, nothing to do; the same is in gen_return();
return;
}
// A special stack preparation is performed in order to deal with
// a stack overflow error (SOE) at runtime:
// First, the callee-save registers are not changed until we are
// absolutely sure we have enough stack. In this case, if SOE happens,
// we'll simply do nothing in unwind_stack().
//
// Allocate stack frame at the very beginning, so we are always in
// the predictable state in unwind_frame().
//
unsigned frameSize = m_stack.size();
alu(alu_sub, sp, frameSize);
// Lock all the args registers to avoid them to be rewritten by the
// frame setup procedures
rlock(m_ci);
{
// Here is pretty rare case, though still need to be proceeded:
// When we allocate a stack frame of size more than one page then the
// memory page(s) may not be accessible and even not allocated.
// A direct access to such [non existing] page raises 'access
// violation'. To avoid the problem we need simply probe (make read
// access) to the pages sequentially. In response on read-access to
// inaccessible page, the OS grows up the stack, so pages become
// accessible.
const unsigned PAGE_SIZE = 0x1000;
unsigned pages =
(frameSize + m_max_native_stack_depth +
PAGE_SIZE -1)/PAGE_SIZE;
if (method_is_synchronized(m_method) || hasSOEHandlers) {
//A contract with VM: check extra page for synchronized methods or methods with SOE handlers.
pages++;
}
//
for (unsigned i=1; i<pages; i++) {
AR ar = valloc(i32);
ld4(ar, sp, frameSize-i*PAGE_SIZE);
}
}
// When requested, store the whole context (==including scratch registers)
// - normally for JVMTI PopFrame support.
// Scratch registers get stored separately from the callee-save:
// The callee-save registers are stored into spill area, but we can't
// save scratch regs there - this area is already used to temporary
// save scratch regs during method calls, etc (see gen_vm_call_restore).
// Thus, we dedicate a separate place.
const bool storeWholeContext =
m_infoBlock.get_compile_params().exe_restore_context_after_unwind;
#ifdef _DEBUG
// Fill the whole stack frame with a special value
// -1 to avoid erasing retAddr
int num_words = frameSize/STACK_SLOT_SIZE- 1;
AR ar = valloc(iplatf);
Opnd fill(iplatf, ar);
rlock(ar);
AR ridx = valloc(iplatf);
runlock(ar);
Opnd idx(iplatf, ridx);
//
// When filling up the frame, the regs context is destroyed - preserve
// it.
if (storeWholeContext) {
push(fill);
push(idx);
}
//
#ifdef _EM64T_
mov(fill, (uint_ptr)0xDEADBEEFDEADBEEF);
#else
mov(fill, 0xDEADBEEF);
#endif
mov(idx, num_words);
unsigned _loop = ipoff();
mov(Opnd(iplatf, sp, 0, ridx, STACK_SLOT_SIZE), fill);
alu(alu_sub, idx, 1);
unsigned br_off = br(nz, 0, 0);
patch(br_off, ip(_loop));
if (storeWholeContext) {
pop(idx);
pop(fill);
}
#endif
// save callee-save registers. If frame size is less than 1 page,
// the page was not touched yet, and the SOE may happen here
for (unsigned i=0; i<ar_num; i++) {
AR ar = _ar(i);
if (ar==sp || !is_callee_save(ar) || !m_global_rusage.test(i)) {
continue;
}
// use maximum possible size to store the register
jtype jt = is_f(ar) ? dbl64 : jobj;
// Here, always use sp-based addressing - bp frame is not ready
// yet.
st(jt, ar, sp, frameSize+m_stack.spill(ar));
m_infoBlock.saved(ar);
}
if (storeWholeContext) {
// For JVMTI's PopFrame we store all scratch registers to a special
// place.
if (is_set(DBG_TRACE_CG)) { dbg(";;>jvmti.save.all.regs\n"); }
for (unsigned i=0; i<ar_num; i++) {
AR ar = _ar(i);
if (is_callee_save(ar) || ar==sp) {
continue;
}
// use maximum possible size to store the register
jtype jt = is_f(ar) ? dbl64 : jobj;
// Here, always use sp-based addressing - bp frame is not ready
// yet.
st(jt, ar, sp, frameSize+m_stack.jvmti_register_spill_offset(ar));
}
if (is_set(DBG_TRACE_CG)) { dbg(";;>~jvmti.save.all.regs\n"); }
}
// ok, if we pass to this point at runtime, then we have enough stack
// and we stored all needed registers, so in case of unwind_stack()
// we'll simply restore registers from the stack.
unsigned thisPoint = ipoff() - prologStart;
m_infoBlock.set_warmup_len(thisPoint);
if (m_base != sp) {
// create bp-frame
lea(m_base, Opnd(jobj, sp, frameSize));
}
// Must be here, after the stack get aligned
if (is_set(DBG_TRACE_EE)) {
gen_dbg_rt(true, "entering: %s", meth_fname());
}
//
// reload input args into local vars
//
::std::vector<unsigned> locals_map;
locals_map.resize(words(m_ci.count()));
// an initial GC map for input args
::std::vector<unsigned> args_map;
args_map.resize(words(m_ci.count()));
// an initial GC map for callee-save registers
unsigned regs_map = 0;
// STACK_SLOT_SIZE <= retAddr
unsigned const sp_offset = frameSize + STACK_SLOT_SIZE;
// Spill out registers that are both input args and globally allocated
for (unsigned i=0, local=0; i<m_ci.count(); i++, local++) {
jtype jt = m_ci.jt(i);
AR ar = m_ci.reg(i);
if (ar != ar_x && m_global_rusage.test(ar_idx(ar))) {
Opnd arg = m_ci.get(i);
Val& var = vlocal(jt, local, true);
mov(var.as_opnd(), arg);
// A presumption, to simplify the code: if the managed calling
// convention uses registers, then it's a platform without 'big'
// type problem.
assert(!is_big(jt));
}
if (is_wide(jt)) {
++local;
}
}
// Now, process input args:
// - set GC maps for objects came as input args,
// - move input args into the slots in the local stack frame (for some
// args)
for (unsigned i = 0, local=0; i<m_ci.count(); i++, local++) {
jtype jt = m_ci.jt(i);
// All values less than 32 bits get moved between methods as I_32
if (jt<i32) {
jt = i32;
}
// If this is an object, then set a bit in appropriate map ...
if (jt == jobj) {
AR ar = vreg(jobj, local);
if (ar != ar_x && is_callee_save(ar)) {
// .. callee-saved GP regs or ..
regs_map |= 1<<ar_idx(m_ra[local]);
}
else if (vis_arg(local)) {
// .. local vars that are kept on the input slots or
// when we need to keep input args valid during enumeration
// (for example for JVMTI PopFrame needs) ...
assert(m_ci.reg(i) == ar_x);
assert(0 == (m_ci.off(i) % STACK_SLOT_SIZE));
int inVal = m_ci.off(i) / STACK_SLOT_SIZE;
args_map[word_no(inVal)] =
args_map[word_no(inVal)] | (1 <<bit_no(inVal));
if (g_jvmtiMode) {
// .. a 'regular' GC map for locals - must report
// together with input args in case of JVMTI
locals_map[word_no(local)] =
locals_map[word_no(local)] | (1 <<bit_no(local));
}
}
else {
assert(m_ci.reg(i) != ar_x);
// .. a 'regular' GC map for locals.
locals_map[word_no(local)] =
locals_map[word_no(local)] | (1 <<bit_no(local));
}
}
jtype jtm = jtmov(jt);
// as_type() => Convert narrow types (<i32) to i32.
Opnd arg = m_ci.get(i, sp_offset).as_type(jt);
// If we need to store 'this' for special reporting (i.e.
// monitor_exit or for stack trace) - store it.
if (i==0 && is_set(JMF_REPORT_THIS)) {
if (is_set(DBG_TRACE_CG)) {dbg(";;>copy thiz\n");}
assert(jt == jobj);
Opnd thiz(jobj, m_base, voff(m_stack.thiz()));
do_mov(thiz, arg);
if (is_set(DBG_TRACE_CG)) {dbg(";;>~copy thizh\n");}
}
// If the local resides on the input arg, then no need to copy it
// from input arg into the frame except JVMTI mode.
if (vis_arg(local) && !g_jvmtiMode) {
if (is_wide(jt)) {
++local;
}
continue;
}
//
// Ok, copy the from input args area, into local variables area
//
// Define the slot, so it has proper type
vvar_def(jt, local);
if (arg.is_reg() && m_global_rusage.test(ar_idx(arg.reg()))) {
// See a loop above - the argument already spilled into memory,
// nothing to do
}
else {
// forDef = true to avoid uploading, so it only returns memory
// operand
Val& var = vlocal(jt, local, true);
do_mov(var, arg);
if (is_big(jt)) {
// Presumption: on IA32 (<= is_big()==true) no i64 inputs
// are left of input args
assert(!vis_arg(local+1));
// Presumption: on IA32 (<= is_big()==true) no i64 inputs
// come on registers
assert(arg.is_mem());
assert(arg.index() == ar_x);
Val arg_hi(jtm, arg.base(), arg.disp()+4);
Val var_hi = vlocal(jt, local+1, true);
do_mov(var_hi, arg_hi);
}
}
if (is_wide(jt)) {
++local;
}
}
runlock(m_ci);
//
// Store the GC map for the local variables that are initialized as
// they come from input args
//
if (is_set(DBG_TRACE_CG) && locals_map.size() != 0) {dbg(";;>locals.gc_map\n");}
for (unsigned i = 0; i<locals_map.size(); i++) {
Opnd map(i32, m_base, voff(m_stack.info_gc_locals()+i*sizeof(int)));
Opnd val(locals_map[i]);
mov(map, val);
}
//
// For other local variables, zero the GC map
//
unsigned locals_gc_size = words(m_infoBlock.get_num_locals());
if (locals_gc_size != locals_map.size()) {
if (is_set(DBG_TRACE_CG)) {dbg(";;>locals.gc_map\n");}
Opnd reg(i32, valloc(i32));
alu(alu_xor, reg, reg);
for (unsigned i=(U_32)locals_map.size(); i<locals_gc_size; i++) {
st4(reg.reg(), m_base, voff(m_stack.info_gc_locals()+i*sizeof(int)));
}
}
//
// Store the GC map for input args
//
if (is_set(DBG_TRACE_CG) && args_map.size() != 0) {dbg(";;>args.gc_map\n");}
for (unsigned i = 0; i<args_map.size(); i++) {
Opnd map(i32, m_base, voff(m_stack.info_gc_args()+i*sizeof(int)));
Opnd val(args_map[i]);
mov(map, val);
}
//
// Store info about objects on registers
//
if (is_set(DBG_TRACE_CG)) {dbg(";;>regs.gc_map\n");}
Opnd map(i32, m_base, voff(m_stack.info_gc_regs()));
Opnd val(regs_map);
mov(map, val);
//
// Initial stack size is zero
//
if (is_set(DBG_TRACE_CG)) {dbg(";;>gc.stack_depth\n");}
Opnd dpth(i32, m_base, voff(m_stack.info_gc_stack_depth()));
mov(dpth, Opnd(0));
m_bbstate->stack_depth = 0;
// Make the variables on their places - in a case if call to
// JVMTI/monitor_enter/recompilation helper lead to GC
// TODO: May optimize a bit by specifying (0) - if the 0th BB is
// ref_count==1. In this case there is no real need to upload all the
// items on their registers. This will require special processing in both
// bb_enter() and bb_leave()
gen_bb_leave(NOTHING);
//
// now, everything is ready, may call VM/whatever
//
// Debugging - print out 'Entering ...'
if (is_set(DBG_TRACE_EE)) {
if (is_set(DBG_TRACE_CG)) {dbg(";;>print.ee\n");}
rlock(cs_trace_arg);
// Print out input args
for (unsigned i=0, local=0; i<m_ci.count(); i++, local++) {
// prepare stack
if(cs_trace_arg.size() != 0) {
alu(alu_sub, sp, cs_trace_arg.size());
}
// 'local'-th argument as a first arg for dbg_trace_arg() ...
jtype jt = m_ci.jt(i);
if (jt<i32) jt = i32;
Opnd arg = cs_trace_arg.get(0);
Val var;
if (vreg(jt, local) != ar_x) {
AR ar = vreg(jt, local);
if (is_f(ar) && arg.is_reg()) {
// If the local var resides on a float-point register,
// and calling canovention uses registers to pass args
// - we can not simply do 'mov gr, fr'. Store fr to
// memory first, then reload it to gr
assert(is_gr(arg.reg()));
Opnd scratch(jt, m_base, voff(m_stack.scratch()));
mov(scratch, Opnd(jt, ar));
jt = jt == flt32 ? i32 : i64;
var = scratch.as_type(jt);
}
else {
var = Val(jt, ar);
}
}
else {
var = Val(jt, m_base, vlocal_off(local));
}
do_mov(arg, var.as_opnd(arg.jt()));
// ... its type and index ...
gen_call_novm(cs_trace_arg, (void*)&dbg_trace_arg, 1, i, jt);
if (is_wide(jt)) {
++local;
}
}
runlock(cs_trace_arg);
if (is_set(DBG_TRACE_CG)) {dbg(";;>~print.ee\n");}
}
//
// Profiling/recompilation support
//
if (is_set(JMF_PROF_ENTRY_BE)) {
if (is_set(DBG_TRACE_CG)) { dbg(";;>profiling\n"); }
// Increment entry counter
AR ar = valloc(jobj);
movp(ar, m_p_methentry_counter);
Opnd addr(i32, ar, 0);
if (is_set(JMF_PROF_SYNC_CHECK)) {
rlock(ar);
AR gr_val = valloc(i32);
runlock(ar);
Opnd val(i32, gr_val);
Opnd thr(m_methentry_threshold);
/* mov vreg, [counter] */ mov(val, addr);
/* add vreg, 1 */ alu(alu_add, val, Opnd(1));
/* mov [counter], vreg */ mov(addr, val);
/* cmp vreg, threshold */ alu(alu_cmp, val, thr);
/* jne keep_going */
/* call recompile */
/* keep_going: ... */
unsigned br_off = br(ne, 0, 0, taken);
gen_call_vm_restore(false, ci_helper_o, m_recomp_handler_ptr,
0, m_profile_handle);
patch(br_off, ip());
}
else {
alu(alu_add, addr, Opnd(1));
}
if (is_set(DBG_TRACE_CG)) { dbg(";;>~profiling\n"); }
}
//
// JVMTI method_enter notification
//
if (compilation_params.exe_notify_method_entry) {
AR ar = valloc(iplatf);
Opnd flag_addr(iplatf, ar);
mov(flag_addr,Opnd(iplatf,(int_ptr)rt_method_entry_flag_address));
Opnd mem(i16, ar, 0);
alu(alu_cmp, mem, Opnd(0));
unsigned br_off = br(z, 0, 0, taken);
SYNC_FIRST(static const CallSig cs_ti_menter(CCONV_HELPERS, jvoid, jobj));
gen_call_vm(cs_ti_menter, rt_helper_ti_method_enter, 0, m_method);
patch(br_off, ip());
}
if (meth_is_sync()) {
unsigned stackFix = 0;
if (is_set(DBG_TRACE_CG)) { dbg(";;>monitor_enter\n"); }
if (method_is_static(m_method)) {
gen_call_vm(cs_jlc, rt_helper_class_2_jlc, 0, m_klass);
gen_save_ret(cs_jlc);
stackFix = gen_stack_to_args(true, cs_mon, 0);
//gen_call_vm(cs_mon, rt_helper_monitor_enter_static, 0, m_klass);
}
else {
AR gr = gr0;
if (cs_mon.reg(0) != gr_x) {
if (cs_mon.size() != 0) {
assert(cs_mon.caller_pops());
alu(alu_sub, sp, cs_mon.size());
}
ld(jobj, cs_mon.reg(0), m_base, voff(m_stack.thiz()));
}
else {
assert(cs_mon.size() != 0);
alu(alu_sub, sp, cs_mon.size());
ld(jobj, gr, m_base, voff(m_stack.thiz()));
st(jobj, gr, sp, cs_mon.off(0));
}
//gen_call_vm(cs_mon, rt_helper_monitor_enter, 1);
}
gen_call_vm(cs_mon, rt_helper_monitor_enter, 1);
if (method_is_static(m_method)) {
runlock(cs_mon);
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
}
}
if (is_set(DBG_TRACE_CG)) { dbg(";;>~monitor_enter\n"); }
}
if (is_set(DBG_TRACE_CG)) {
dbg_dump_state("after prolog", m_bbstate);
}
}
void Compiler::gen_return(const CallSig& cs)
{
jtype retType = cs.ret_jt();
if (is_set(DBG_TRACE_EE)) {
gen_dbg_rt(true, "exiting : %s", meth_fname());
}
if (m_infoBlock.get_bc_size() == 1 && m_bc[0] == OPCODE_RETURN && !g_jvmtiMode) {
// empty method, nothing to do; the same is in gen_prolog();
// TODO: need to check and make sure whether it's absolutely legal
// to bypass monitors on such an empty method
// FIXME: this op9n bypasses JVMTI notifications
ret(m_ci.caller_pops() ? 0 : m_ci.size());
if (retType != jvoid) {
vpop();
}
return;
}
bool is_sync = meth_is_sync();
if (is_sync) {
unsigned stackFix = 0;
if (is_set(DBG_TRACE_CG)) {
dbg(";;>monitor_exit\n");
}
if (meth_is_static()) {
gen_call_vm(cs_jlc, rt_helper_class_2_jlc, 0, m_klass);
gen_save_ret(cs_jlc);
stackFix = gen_stack_to_args(true, cs_mon, 0);
//gen_call_vm(cs_mon, rt_helper_monitor_exit_static, 0, m_klass);
} else {
AR gr = valloc(jobj);
if (cs_mon.reg(0) != gr_x) {
if (cs_mon.size() != 0) {
assert(cs_mon.caller_pops());
alu(alu_sub, sp, cs_mon.size());
}
vpark(cs_mon.reg(0));
ld(jobj, cs_mon.reg(0), m_base, voff(m_stack.thiz()));
}
else {
assert(cs_mon.size() != 0);
alu(alu_sub, sp, cs_mon.size());
ld(jobj, gr, m_base, voff(m_stack.thiz()));
st(jobj, gr, sp, cs_mon.off(0));
}
//gen_call_vm(cs_mon, rt_helper_monitor_exit, 1);
}
gen_call_vm(cs_mon, rt_helper_monitor_exit, 1);
if (meth_is_static()) {
runlock(cs_mon);
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
}
}
if (is_set(DBG_TRACE_CG)) {
dbg(";;>~monitor_exit\n");
}
}
if (compilation_params.exe_notify_method_exit) {
// JVMTI helper takes pointer to return value and method handle
SYNC_FIRST(static const CallSig cs_ti_mexit(CCONV_HELPERS, jvoid, jobj, jobj));
// The call is a bit unusual, and is processed as follows:
// we load an address of the top of the operand stack into
// a temporary register, and then pass this value as pointer
// to return value. If method returns void, then we load address
//of top of the stack anyway.
Val retVal;
rlock(cs_ti_mexit);
Val retValPtr = Val(jobj, valloc(jobj));
rlock(retValPtr);
if (retType != jvoid) {
// Make sure the top item is on the memory
vswap(0);
if (is_big(retType)) {
vswap(1);
}
const Val& s = vstack(0);
assert(s.is_mem());
lea(retValPtr.as_opnd(), s.as_opnd());
}
else {
Opnd stackTop(jobj, m_base, voff(m_stack.unused()));
lea(retValPtr.as_opnd(), stackTop);
}
runlock(retValPtr);
AR ar = valloc(iplatf);
Opnd flag_addr(iplatf, ar);
mov(flag_addr,Opnd(iplatf,(int_ptr)rt_method_exit_flag_address));
Opnd mem(i16, ar, 0);
alu(alu_cmp, mem, Opnd(0));
unsigned br_off = br(z, 0, 0, taken);
Val vmeth(jobj, m_method);
gen_args(cs_ti_mexit, 0, &vmeth, &retValPtr);
gen_call_vm(cs_ti_mexit, rt_helper_ti_method_exit, cs_ti_mexit.count());
runlock(cs_ti_mexit);
patch(br_off, ip());
}
AR out_reg = cs.ret_reg(0);
if (is_f(retType)) {
if (out_reg == fp0) {
// On IA-32 always swap to memory first, then upload into FPU
vswap(0);
ld(retType, out_reg, m_base, vstack_off(0));
} else {
// Make sure the item is not immediate
Val op = vstack(0, vis_imm(0));
if (!op.is_reg() || op.reg() != out_reg) {
Opnd ret(retType, out_reg);
mov(ret, op.as_opnd());
}
}
}
else if (is_big(retType)) {
#ifdef _IA32_
vswap(0);
vswap(1);
AR out_reg1 = cs.ret_reg(1);
ld4(out_reg, m_base, vstack_off(0));
ld4(out_reg1, m_base, vstack_off(1));
#else
assert(false && "Unexpected case - 'big' type on EM64T");
#endif
}
else if (retType != jvoid) {
Val& op = vstack(0);
if (!op.is_reg() || op.reg() != out_reg) {
Opnd ret(retType, out_reg);
mov(ret, op.as_opnd());
}
}
if (retType != jvoid && is_set(DBG_TRACE_EE)) {
//TODO: the same code is in gen_save_ret() - extract into a
// separate method ?
push_all();
AR gtmp = gr0;
Opnd op = vstack(0, true).as_opnd();
st(jtmov(retType), op.reg(), m_base, voff(m_stack.scratch()));
ld(jobj, gtmp, m_base, voff(m_stack.scratch()));
if (cs_trace_arg.reg(0) != gr_x) {
if (cs_trace_arg.size() != 0) {
assert(cs_trace_arg.caller_pops());
alu(alu_sub, sp, cs_trace_arg.size());
}
mov(cs_trace_arg.reg(0), gtmp);
}
else {
assert(cs_trace_arg.size() != 0);
alu(alu_sub, sp, cs_trace_arg.size());
st4(gtmp, sp, cs_trace_arg.off(0));
}
Encoder::gen_args(cs_trace_arg, gtmp, 1, 2, -1, retType);
movp(gtmp, (void*)&dbg_trace_arg);
call(gtmp, cs_trace_arg, is_set(DBG_CHECK_STACK));
pop_all();
}
unsigned frameSize = m_stack.size();
// Restore callee-save regs
for (unsigned i=0; i<ar_num; i++) {
AR ar = _ar(i);
if (ar==sp || !is_callee_save(ar) || !m_global_rusage.test(i)) {
continue;
}
jtype jt = is_f(ar) ? dbl64 : jobj;
// Here, always use sp-based addressing - bp frame may be destroyed
// already by restoring bp.
ld(jt, ar, sp, frameSize+m_stack.spill(ar));
}
alu(alu_add, sp, frameSize);
ret(m_ci.caller_pops() ? 0 : m_ci.size());
//m_jframe->clear_stack();
if (retType != jvoid) {
// free up registers
vpop();
}
}
void CodeGen::gen_invoke(JavaByteCodes opcod, Method_Handle meth, unsigned short cpIndex,
const ::std::vector<jtype> &args, jtype retType)
{
const unsigned slots = count_slots(args);
// where (stack depth) 'this' is stored for the method being invoked
// (if applicable)
const unsigned thiz_depth = slots - 1;
const JInst& jinst = *m_curr_inst;
CallSig cs(CCONV_MANAGED, retType, args);
for (unsigned i=0; i<cs.count(); i++) {
AR ar = cs.reg(i);
if (ar == ar_x) continue;
vpark(ar);
}
unsigned stackFix = 0;
rlock(cs);
const bool is_static = opcod == OPCODE_INVOKESTATIC;
if (meth == NULL && !m_lazy_resolution) {
runlock(cs); // was just locked above - unlock
gen_call_throw(ci_helper_linkerr, rt_helper_throw_linking_exc, 0,
m_klass, jinst.op0, jinst.opcode);
stackFix = gen_stack_to_args(true, cs, 0); // pop out args
runlock(cs); // due to gen_stack_to_args()
gen_gc_stack(-1, true);
if (retType != jvoid) {
gen_save_ret(cs);
}
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
}
return;
}
if (meth == NULL) {
//lazy resolution mode: get method addr and call it.
assert(m_lazy_resolution);
AR gr_ret = ar_x;
//1. get method address
if (opcod == OPCODE_INVOKESTATIC || opcod == OPCODE_INVOKESPECIAL) {
SYNC_FIRST(static const CallSig cs_get_is_addr(CCONV_HELPERS, iplatf, iplatf, i32));
rlock(cs_get_is_addr);
if (!is_static)
{
Val &thiz = vstack(thiz_depth, false);
// For invokeSPECIAL, we're using indirect address provided by
// the VM. This means we do not read vtable, which means no
// memory access, so we can't use HW checks - have to use
// explicit one. Not a big loss, as the INVOKESPECIAL mostly
// comes right after NEW which guarantees non-null.
// in lazy resolution mode we must do manual check and provide helper with
// non-null results.
gen_check_null(thiz, false);
}
char* helper = opcod == OPCODE_INVOKESTATIC ? rt_helper_get_invokestatic_addr_withresolve :
rt_helper_get_invokespecial_addr_withresolve;
vpark();
gen_call_vm(cs_get_is_addr, helper, 0, m_klass, cpIndex);
runlock(cs_get_is_addr);
gr_ret = cs_get_is_addr.ret_reg(0);
} else {
assert(opcod == OPCODE_INVOKEVIRTUAL || opcod == OPCODE_INVOKEINTERFACE);
SYNC_FIRST(static const CallSig cs_get_iv_addr(CCONV_HELPERS, iplatf, iplatf, i32, jobj));
rlock(cs_get_iv_addr);
Val &thiz = vstack(thiz_depth, false);
gen_check_null(thiz, false);
char * helper = opcod == OPCODE_INVOKEVIRTUAL ? rt_helper_get_invokevirtual_addr_withresolve :
rt_helper_get_invokeinterface_addr_withresolve;
// setup constant parameters first,
Val vclass(iplatf, m_klass);
Val vcpIdx(cpIndex);
vpark();
gen_args(cs_get_iv_addr, 0, &vclass, &vcpIdx, &thiz);
gen_call_vm(cs_get_iv_addr, helper, 3);
runlock(cs_get_iv_addr);
gr_ret = cs_get_iv_addr.ret_reg(0);
}
rlock(gr_ret); //WARN: call addr is in gr_ret -> lock it
//2. call java method
stackFix = gen_stack_to_args(true, cs, 0);
vpark();
gen_gc_stack(-1, true);
AR gr = valloc(iplatf);
ld(jobj, gr, gr_ret); //load indirect addr
call(gr, cs, is_set(DBG_CHECK_STACK));
runlock(gr_ret);
}
else if (opcod == OPCODE_INVOKEINTERFACE) {
// if it's INVOKEINTERFACE, then first resolve it
Class_Handle klass = method_get_class(meth);
const CallSig cs_vtbl(CCONV_HELPERS, iplatf, jobj, jobj);
rlock(cs_vtbl);
Val &thiz = vstack(thiz_depth, true);
rlock(thiz);
gen_check_null(thiz, true);
// Prepare args for ldInterface helper
if (cs_vtbl.reg(0) == gr_x) {
assert(cs_vtbl.size() != 0);
alu(alu_sub, sp, cs_vtbl.size());
st(jobj, thiz.reg(), sp, cs_vtbl.off(0));
}
else {
if (cs_vtbl.size() != 0) {
assert(cs_vtbl.caller_pops());
alu(alu_sub, sp, cs_vtbl.size());
}
mov(cs_vtbl.get(0), thiz.as_opnd());
}
runlock(thiz);
gen_call_vm(cs_vtbl, rt_helper_get_vtable, 1, klass);
AR gr_ret = cs_vtbl.ret_reg(0);
runlock(cs_vtbl);
//
// Method's vtable is in gr_ret now, prepare stack
//
rlock(gr_ret);
//st(jobj, gr_ret, m_base, voff(m_stack.scratch()));
stackFix = gen_stack_to_args(true, cs, 0);
vpark();
gen_gc_stack(-1, true);
unsigned offset = method_get_vtable_offset(meth);
//ld(jobj, gr_ret, m_base, voff(m_stack.scratch()));
runlock(gr_ret);
ld(jobj, gr_ret, gr_ret, offset);
call(gr_ret, cs, is_set(DBG_CHECK_STACK));
}
else if (opcod == OPCODE_INVOKEVIRTUAL) {
Val &thiz = vstack(thiz_depth, true);
rlock(thiz);
stackFix = gen_stack_to_args(true, cs, 0);
vpark();
gen_gc_stack(-1, true);
// Check for null here - we just spilled all the args and
// parked all the registers, so we have a chance to use HW NPE
gen_check_null(thiz, true);
AR gr = valloc(jobj);
size_t offset = method_get_vtable_offset(meth);
Opnd ptr;
if (g_vtbl_squeeze) {
ld4(gr, thiz.reg(), rt_vtable_offset);
AR gr_vtbase = valloc(jobj);
movp(gr_vtbase, (char*)VTBL_BASE+offset);
alu(jobj, alu_add, gr, gr_vtbase);
ptr = Opnd(jobj, gr, 0);
}
else {
ld(jobj, gr, thiz.reg(), rt_vtable_offset);
ptr = Opnd(jobj, gr, (int)offset);
}
call(ptr, cs, is_set(DBG_CHECK_STACK));
runlock(thiz);
}
else {
Val *thiz = NULL;
if (!is_static)
thiz = &vstack(thiz_depth, true);
stackFix = gen_stack_to_args(true, cs, 0);
vpark();
gen_gc_stack(-1, true);
if (!is_static)
// Check for null here - we just spilled all the args and
// parked all the registers, so we have a chance to use HW NPE
// For invokeSPECIAL, we're using indirect address provided by
// the VM. This means we do not read vtable, which means no
// memory access, so we can't use HW checks - have to use
// explicit one. Not a big loss, as the INVOKESPECIAL mostly
// comes right after NEW which guarantees non-null.
// in lazy resolution mode we must do manual check and provide helper with
// non-null results.
gen_check_null(*thiz, false);
void * paddr = method_get_indirect_address(meth);
#ifdef _IA32_
Opnd ptr(jobj, ar_x, paddr);
#else
AR gr = valloc(jobj);
movp(gr, paddr);
ld(jobj, gr, gr);
Opnd ptr(jobj, gr);
#endif
call(ptr, cs, is_set(DBG_CHECK_STACK));
}
// to unlock after gen_stack_to_args()
runlock(cs);
// to unlock after explicit lock at the top of this method
runlock(cs);
if (retType != jvoid) {
gen_save_ret(cs);
}
if (stackFix != 0) {
alu(alu_sub, sp, stackFix);
}
}
void CodeGen::gen_args(const CallSig& cs, unsigned idx, const Val * parg0,
const Val * parg1, const Val * parg2, const Val * parg3,
const Val * parg4, const Val * parg5, const Val * parg6)
{
if (idx == 0 && cs.size() != 0) {
alu(alu_sub, sp, cs.size());
}
const Val* args[] = {parg0, parg1, parg2, parg3, parg4, parg5, parg6};
unsigned steps = min((int)COUNTOF(args), (int)cs.count()-(int)idx);
// 1st, lock'em all
for (unsigned i=0; i<steps; i++) {
if (args[i] == 0) {
break;
}
rlock(*args[i]);
}
// 2nd, generate moves
for (unsigned i=0; i<steps; i++) {
if (args[i] == 0) {
break;
}
unsigned id = idx + i;
Opnd arg = cs.get(id);
do_mov(arg, *args[i]);
// 3d, unlock when not needed anymore
runlock(*args[i]);
}
}
void CodeGen::gen_save_ret(const CallSig& cs)
{
jtype jt = cs.ret_jt();
assert(jt != jvoid);
AR ar = cs.ret_reg(0);
if (jt==i8) {
sx1(Opnd(i32, ar), Opnd(jt,ar));
jt = i32;
}
else if (jt == i16) {
sx2(Opnd(i32, ar), Opnd(jt,ar));
jt = i32;
}
else if (jt == u16) {
zx2(Opnd(i32, ar), Opnd(jt,ar));
jt = i32;
}
#ifdef _IA32_
if(ar == fp0) {
// Cant use vstack_off right here, as the item is not yet pushed.
unsigned slot = m_jframe->size();
if (is_wide(jt)) {
slot += 1;
}
vpush(Val(jt, m_base, voff(m_stack.stack_slot(slot))));
//
st(jt, fp0, m_base, vstack_off(0));
}
else if (is_big(jt)) {
assert(jt==i64);
AR ar1 = cs.ret_reg(1);
vpush2(Val(jt, ar), Val(jt, ar1));
}
else
#endif
{
assert(!is_big(jt));
vpush(Val(jt, ar));
}
if (is_set(DBG_TRACE_EE) && !is_f(jt) && !is_big(jt)) {
push_all(true);
assert(!is_callee_save(gr0));
AR gtmp = gr0;
//ld(jobj, gtmp, bp, m_stack.stack_slot(m_jframe->depth2slot(0)));
Opnd tmp(jt, gtmp);
mov(tmp, Opnd(jt, ar));
if (cs_trace_arg.reg(0) != gr_x) {
if (cs_trace_arg.size() != 0) {
alu(alu_sub, sp, cs_trace_arg.size());
}
mov(cs_trace_arg.reg(0), gtmp);
}
else {
assert(cs_trace_arg.size() != 0);
alu(alu_sub, sp, cs_trace_arg.size());
mov(Opnd(jt, sp, cs_trace_arg.off(0)), tmp);
}
Encoder::gen_args(cs_trace_arg, gtmp, 1, 2, -1, jt);
movp(gtmp, (void*)&dbg_trace_arg);
call(gtmp, cs_trace_arg, is_set(DBG_CHECK_STACK));
pop_all();
}
}
}}; // ~namespace Jitrino::Jet