lib/luajit/src/lj_target.h - trafficserver - Git at Google

 /*
 ** Definitions for target CPU.
 ** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
 */

 #ifndef _LJ_TARGET_H
 #define _LJ_TARGET_H

 #include "lj_def.h"
 #include "lj_arch.h"

 /* -- Registers and spill slots ------------------------------------------- */

 /* Register type (uint8_t in ir->r). */
 typedef uint32_t Reg;

 /* The hi-bit is NOT set for an allocated register. This means the value
 ** can be directly used without masking. The hi-bit is set for a register
 ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
 */
 #define RID_NONE		0x80
 #define RID_MASK		0x7f
 #define RID_INIT		(RID_NONE|RID_MASK)
 #define RID_SINK		(RID_INIT-1)
 #define RID_SUNK		(RID_INIT-2)

 #define ra_noreg(r)		((r) & RID_NONE)
 #define ra_hasreg(r)		(!((r) & RID_NONE))

 /* The ra_hashint() macro assumes a previous test for ra_noreg(). */
 #define ra_hashint(r)		((r) < RID_SUNK)
 #define ra_gethint(r)		((Reg)((r) & RID_MASK))
 #define ra_sethint(rr, r)	rr = (uint8_t)((r)|RID_NONE)
 #define ra_samehint(r1, r2)	(ra_gethint((r1)^(r2)) == 0)

 /* Spill slot 0 means no spill slot has been allocated. */
 #define SPS_NONE		0

 #define ra_hasspill(s)		((s) != SPS_NONE)

 /* Combined register and spill slot (uint16_t in ir->prev). */
 typedef uint32_t RegSP;

 #define REGSP(r, s)		((r) + ((s) << 8))
 #define REGSP_HINT(r)		((r)|RID_NONE)
 #define REGSP_INIT		REGSP(RID_INIT, 0)

 #define regsp_reg(rs)		((rs) & 255)
 #define regsp_spill(rs)		((rs) >> 8)
 #define regsp_used(rs) \
   (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))

 /* -- Register sets ------------------------------------------------------- */

 /* Bitset for registers. 32 registers suffice for most architectures.
 ** Note that one set holds bits for both GPRs and FPRs.
 */
 #if LJ_TARGET_PPC || LJ_TARGET_MIPS
 typedef uint64_t RegSet;
 #else
 typedef uint32_t RegSet;
 #endif

 #define RID2RSET(r)		(((RegSet)1) << (r))
 #define RSET_EMPTY		((RegSet)0)
 #define RSET_RANGE(lo, hi)	((RID2RSET((hi)-(lo))-1) << (lo))

 #define rset_test(rs, r)	((int)((rs) >> (r)) & 1)
 #define rset_set(rs, r)		(rs |= RID2RSET(r))
 #define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
 #define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
 #if LJ_TARGET_PPC || LJ_TARGET_MIPS
 #define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
 #define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
 #else
 #define rset_picktop(rs)	((Reg)lj_fls(rs))
 #define rset_pickbot(rs)	((Reg)lj_ffs(rs))
 #endif

 /* -- Register allocation cost -------------------------------------------- */

 /* The register allocation heuristic keeps track of the cost for allocating
 ** a specific register:
 **
 ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
 **
 ** An already allocated register has the (non-zero) IR reference in the lowest
 ** bits and the result of a blended cost-model in the higher bits.
 **
 ** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
 ** linear search for the minimum cost is used. The search doesn't need to
 ** keep track of the position of the minimum, which makes it very fast.
 ** The lowest bits of the minimum cost show the desired IR reference whose
 ** register is the one to evict.
 **
 ** Without the cost-model this degenerates to the standard heuristics for
 ** (reverse) linear-scan register allocation. Since code generation is done
 ** in reverse, a live interval extends from the last use to the first def.
 ** For an SSA IR the IR reference is the first (and only) def and thus
 ** trivially marks the end of the interval. The LSRA heuristics says to pick
 ** the register whose live interval has the furthest extent, i.e. the lowest
 ** IR reference in our case.
 **
 ** A cost-model should take into account other factors, like spill-cost and
 ** restore- or rematerialization-cost, which depend on the kind of instruction.
 ** E.g. constants have zero spill costs, variant instructions have higher
 ** costs than invariants and PHIs should preferably never be spilled.
 **
 ** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
 ** - Due to careful design of the IR, constants already have lower IR
 **   references than invariants and invariants have lower IR references
 **   than variants.
 ** - The cost in the upper 16 bits is the sum of the IR reference and a
 **   weighted score. The score currently only takes into account whether
 **   the IRT_ISPHI bit is set in the instruction type.
 ** - The PHI weight is the minimum distance (in IR instructions) a PHI
 **   reference has to be further apart from a non-PHI reference to be spilled.
 ** - It should be a power of two (for speed) and must be between 2 and 32768.
 **   Good values for the PHI weight seem to be between 40 and 150.
 ** - Further study is required.
 */
 #define REGCOST_PHI_WEIGHT	64

 /* Cost for allocating a specific register. */
 typedef uint32_t RegCost;

 /* Note: assumes 16 bit IRRef1. */
 #define REGCOST(cost, ref)	((RegCost)(ref) + ((RegCost)(cost) << 16))
 #define regcost_ref(rc)		((IRRef1)(rc))

 #define REGCOST_T(t) \
   ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
 #define REGCOST_REF_T(ref, t)	(REGCOST((ref), (ref)) + REGCOST_T((t)))

 /* -- Target-specific definitions ----------------------------------------- */

 #if LJ_TARGET_X86ORX64
 #include "lj_target_x86.h"
 #elif LJ_TARGET_ARM
 #include "lj_target_arm.h"
 #elif LJ_TARGET_PPC
 #include "lj_target_ppc.h"
 #elif LJ_TARGET_MIPS
 #include "lj_target_mips.h"
 #else
 #error "Missing include for target CPU"
 #endif

 #ifdef EXITSTUBS_PER_GROUP
 /* Return the address of an exit stub. */
 static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
 {
   lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
   return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
 	 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
 }
 /* Avoid dependence on lj_jit.h if only including lj_target.h. */
 #define exitstub_addr(J, exitno) \
   ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno)))
 #endif

 #endif
	/*
	** Definitions for target CPU.
	** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h
	*/

	#ifndef _LJ_TARGET_H
	#define _LJ_TARGET_H

	#include "lj_def.h"
	#include "lj_arch.h"

	/* -- Registers and spill slots ------------------------------------------- */

	/* Register type (uint8_t in ir->r). */
	typedef uint32_t Reg;

	/* The hi-bit is NOT set for an allocated register. This means the value
	** can be directly used without masking. The hi-bit is set for a register
	** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
	*/
	#define RID_NONE 0x80
	#define RID_MASK 0x7f
	#define RID_INIT (RID_NONE\|RID_MASK)
	#define RID_SINK (RID_INIT-1)
	#define RID_SUNK (RID_INIT-2)

	#define ra_noreg(r) ((r) & RID_NONE)
	#define ra_hasreg(r) (!((r) & RID_NONE))

	/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
	#define ra_hashint(r) ((r) < RID_SUNK)
	#define ra_gethint(r) ((Reg)((r) & RID_MASK))
	#define ra_sethint(rr, r) rr = (uint8_t)((r)\|RID_NONE)
	#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)

	/* Spill slot 0 means no spill slot has been allocated. */
	#define SPS_NONE 0

	#define ra_hasspill(s) ((s) != SPS_NONE)

	/* Combined register and spill slot (uint16_t in ir->prev). */
	typedef uint32_t RegSP;

	#define REGSP(r, s) ((r) + ((s) << 8))
	#define REGSP_HINT(r) ((r)\|RID_NONE)
	#define REGSP_INIT REGSP(RID_INIT, 0)

	#define regsp_reg(rs) ((rs) & 255)
	#define regsp_spill(rs) ((rs) >> 8)
	#define regsp_used(rs) \
	(((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))

	/* -- Register sets ------------------------------------------------------- */

	/* Bitset for registers. 32 registers suffice for most architectures.
	** Note that one set holds bits for both GPRs and FPRs.
	*/
	#if LJ_TARGET_PPC \|\| LJ_TARGET_MIPS
	typedef uint64_t RegSet;
	#else
	typedef uint32_t RegSet;
	#endif

	#define RID2RSET(r) (((RegSet)1) << (r))
	#define RSET_EMPTY ((RegSet)0)
	#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo))

	#define rset_test(rs, r) ((int)((rs) >> (r)) & 1)
	#define rset_set(rs, r) (rs \|= RID2RSET(r))
	#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
	#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
	#if LJ_TARGET_PPC \|\| LJ_TARGET_MIPS
	#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
	#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
	#else
	#define rset_picktop(rs) ((Reg)lj_fls(rs))
	#define rset_pickbot(rs) ((Reg)lj_ffs(rs))
	#endif

	/* -- Register allocation cost -------------------------------------------- */

	/* The register allocation heuristic keeps track of the cost for allocating
	** a specific register:
	**
	** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
	**
	** An already allocated register has the (non-zero) IR reference in the lowest
	** bits and the result of a blended cost-model in the higher bits.
	**
	** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
	** linear search for the minimum cost is used. The search doesn't need to
	** keep track of the position of the minimum, which makes it very fast.
	** The lowest bits of the minimum cost show the desired IR reference whose
	** register is the one to evict.
	**
	** Without the cost-model this degenerates to the standard heuristics for
	** (reverse) linear-scan register allocation. Since code generation is done
	** in reverse, a live interval extends from the last use to the first def.
	** For an SSA IR the IR reference is the first (and only) def and thus
	** trivially marks the end of the interval. The LSRA heuristics says to pick
	** the register whose live interval has the furthest extent, i.e. the lowest
	** IR reference in our case.
	**
	** A cost-model should take into account other factors, like spill-cost and
	** restore- or rematerialization-cost, which depend on the kind of instruction.
	** E.g. constants have zero spill costs, variant instructions have higher
	** costs than invariants and PHIs should preferably never be spilled.
	**
	** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
	** - Due to careful design of the IR, constants already have lower IR
	** references than invariants and invariants have lower IR references
	** than variants.
	** - The cost in the upper 16 bits is the sum of the IR reference and a
	** weighted score. The score currently only takes into account whether
	** the IRT_ISPHI bit is set in the instruction type.
	** - The PHI weight is the minimum distance (in IR instructions) a PHI
	** reference has to be further apart from a non-PHI reference to be spilled.
	** - It should be a power of two (for speed) and must be between 2 and 32768.
	** Good values for the PHI weight seem to be between 40 and 150.
	** - Further study is required.
	*/
	#define REGCOST_PHI_WEIGHT 64

	/* Cost for allocating a specific register. */
	typedef uint32_t RegCost;

	/* Note: assumes 16 bit IRRef1. */
	#define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16))
	#define regcost_ref(rc) ((IRRef1)(rc))

	#define REGCOST_T(t) \
	((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
	#define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t)))

	/* -- Target-specific definitions ----------------------------------------- */

	#if LJ_TARGET_X86ORX64
	#include "lj_target_x86.h"
	#elif LJ_TARGET_ARM
	#include "lj_target_arm.h"
	#elif LJ_TARGET_PPC
	#include "lj_target_ppc.h"
	#elif LJ_TARGET_MIPS
	#include "lj_target_mips.h"
	#else
	#error "Missing include for target CPU"
	#endif

	#ifdef EXITSTUBS_PER_GROUP
	/* Return the address of an exit stub. */
	static LJ_AINLINE char exitstub_addr_(char *group, uint32_t exitno)
	{
	lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
	return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
	EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
	}
	/* Avoid dependence on lj_jit.h if only including lj_target.h. */
	#define exitstub_addr(J, exitno) \
	((MCode )exitstub_addr_((char *)((J)->exitstubgroup), (exitno)))
	#endif

	#endif