|
From: <sv...@va...> - 2015-03-26 07:18:45
|
Author: sewardj
Date: Thu Mar 26 07:18:32 2015
New Revision: 3107
Log:
Bug 345215 - Performance improvements for the register allocator
The basic idea is to change the representation of registers (HReg) so
as to give Real registers a unique integer index starting from 0, with
the registers available for allocation numbered consectively from zero
upwards. This allows the register allocator to index into its primary
data structure -- a table tracking the status of each available
register -- using normal array index instead of having to search
sequentially through the table, as now.
It also allows an efficient bitmap-based representation for "set of
Real registers", which is important for the NCODE work.
There are various other perf improvements, most notably in calling
getRegUsage once rather than twice per instruction.
Cost of register allocation is reduced to around 65% ish of what it
previously was. This translates in to speedups close to zero for
compute intensive code up to around 7% for JITing intensive
situations, eg "time perl tests/vg_regtest memcheck/tests/amd64".
Modified:
trunk/priv/host_amd64_defs.c
trunk/priv/host_amd64_defs.h
trunk/priv/host_amd64_isel.c
trunk/priv/host_arm64_defs.c
trunk/priv/host_arm64_defs.h
trunk/priv/host_arm64_isel.c
trunk/priv/host_arm_defs.c
trunk/priv/host_arm_defs.h
trunk/priv/host_arm_isel.c
trunk/priv/host_generic_reg_alloc2.c
trunk/priv/host_generic_regs.c
trunk/priv/host_generic_regs.h
trunk/priv/host_mips_defs.c
trunk/priv/host_mips_defs.h
trunk/priv/host_mips_isel.c
trunk/priv/host_ppc_defs.c
trunk/priv/host_ppc_defs.h
trunk/priv/host_ppc_isel.c
trunk/priv/host_s390_defs.c
trunk/priv/host_s390_defs.h
trunk/priv/host_s390_isel.c
trunk/priv/host_x86_defs.c
trunk/priv/host_x86_defs.h
trunk/priv/host_x86_isel.c
trunk/priv/main_main.c
Modified: trunk/priv/host_amd64_defs.c
==============================================================================
--- trunk/priv/host_amd64_defs.c (original)
+++ trunk/priv/host_amd64_defs.c Thu Mar 26 07:18:32 2015
@@ -44,6 +44,63 @@
/* --------- Registers. --------- */
+const RRegUniverse* getRRegUniverse_AMD64 ( void )
+{
+ /* The real-register universe is a big constant, so we just want to
+ initialise it once. */
+ static RRegUniverse rRegUniverse_AMD64;
+ static Bool rRegUniverse_AMD64_initted = False;
+
+ /* Handy shorthand, nothing more */
+ RRegUniverse* ru = &rRegUniverse_AMD64;
+
+ /* This isn't thread-safe. Sigh. */
+ if (LIKELY(rRegUniverse_AMD64_initted))
+ return ru;
+
+ RRegUniverse__init(ru);
+
+ /* Add the registers. The initial segment of this array must be
+ those available for allocation by reg-alloc, and those that
+ follow are not available for allocation. */
+ ru->regs[ru->size++] = hregAMD64_RSI();
+ ru->regs[ru->size++] = hregAMD64_RDI();
+ ru->regs[ru->size++] = hregAMD64_R8();
+ ru->regs[ru->size++] = hregAMD64_R9();
+ ru->regs[ru->size++] = hregAMD64_R12();
+ ru->regs[ru->size++] = hregAMD64_R13();
+ ru->regs[ru->size++] = hregAMD64_R14();
+ ru->regs[ru->size++] = hregAMD64_R15();
+ ru->regs[ru->size++] = hregAMD64_RBX();
+ ru->regs[ru->size++] = hregAMD64_XMM3();
+ ru->regs[ru->size++] = hregAMD64_XMM4();
+ ru->regs[ru->size++] = hregAMD64_XMM5();
+ ru->regs[ru->size++] = hregAMD64_XMM6();
+ ru->regs[ru->size++] = hregAMD64_XMM7();
+ ru->regs[ru->size++] = hregAMD64_XMM8();
+ ru->regs[ru->size++] = hregAMD64_XMM9();
+ ru->regs[ru->size++] = hregAMD64_XMM10();
+ ru->regs[ru->size++] = hregAMD64_XMM11();
+ ru->regs[ru->size++] = hregAMD64_XMM12();
+ ru->regs[ru->size++] = hregAMD64_R10();
+ ru->allocable = ru->size;
+ /* And other regs, not available to the allocator. */
+ ru->regs[ru->size++] = hregAMD64_RAX();
+ ru->regs[ru->size++] = hregAMD64_RCX();
+ ru->regs[ru->size++] = hregAMD64_RDX();
+ ru->regs[ru->size++] = hregAMD64_RSP();
+ ru->regs[ru->size++] = hregAMD64_RBP();
+ ru->regs[ru->size++] = hregAMD64_R11();
+ ru->regs[ru->size++] = hregAMD64_XMM0();
+ ru->regs[ru->size++] = hregAMD64_XMM1();
+
+ rRegUniverse_AMD64_initted = True;
+
+ RRegUniverse__check_is_sane(ru);
+ return ru;
+}
+
+
void ppHRegAMD64 ( HReg reg )
{
Int r;
@@ -58,17 +115,12 @@
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
- r = hregNumber(reg);
+ r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
vex_printf("%s", ireg64_names[r]);
return;
- case HRcFlt64:
- r = hregNumber(reg);
- vassert(r >= 0 && r < 6);
- vex_printf("%%fake%d", r);
- return;
case HRcVec128:
- r = hregNumber(reg);
+ r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
vex_printf("%%xmm%d", r);
return;
@@ -81,8 +133,8 @@
{
Int r;
static const HChar* ireg32_names[16]
- = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
- "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
+ = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+ "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
ppHReg(reg);
@@ -92,7 +144,7 @@
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
- r = hregNumber(reg);
+ r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
vex_printf("%s", ireg32_names[r]);
return;
@@ -101,77 +153,6 @@
}
}
-HReg hregAMD64_RAX ( void ) { return mkHReg( 0, HRcInt64, False); }
-HReg hregAMD64_RCX ( void ) { return mkHReg( 1, HRcInt64, False); }
-HReg hregAMD64_RDX ( void ) { return mkHReg( 2, HRcInt64, False); }
-HReg hregAMD64_RBX ( void ) { return mkHReg( 3, HRcInt64, False); }
-HReg hregAMD64_RSP ( void ) { return mkHReg( 4, HRcInt64, False); }
-HReg hregAMD64_RBP ( void ) { return mkHReg( 5, HRcInt64, False); }
-HReg hregAMD64_RSI ( void ) { return mkHReg( 6, HRcInt64, False); }
-HReg hregAMD64_RDI ( void ) { return mkHReg( 7, HRcInt64, False); }
-HReg hregAMD64_R8 ( void ) { return mkHReg( 8, HRcInt64, False); }
-HReg hregAMD64_R9 ( void ) { return mkHReg( 9, HRcInt64, False); }
-HReg hregAMD64_R10 ( void ) { return mkHReg(10, HRcInt64, False); }
-HReg hregAMD64_R11 ( void ) { return mkHReg(11, HRcInt64, False); }
-HReg hregAMD64_R12 ( void ) { return mkHReg(12, HRcInt64, False); }
-HReg hregAMD64_R13 ( void ) { return mkHReg(13, HRcInt64, False); }
-HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
-HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
-
-HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
-HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
-HReg hregAMD64_XMM3 ( void ) { return mkHReg( 3, HRcVec128, False); }
-HReg hregAMD64_XMM4 ( void ) { return mkHReg( 4, HRcVec128, False); }
-HReg hregAMD64_XMM5 ( void ) { return mkHReg( 5, HRcVec128, False); }
-HReg hregAMD64_XMM6 ( void ) { return mkHReg( 6, HRcVec128, False); }
-HReg hregAMD64_XMM7 ( void ) { return mkHReg( 7, HRcVec128, False); }
-HReg hregAMD64_XMM8 ( void ) { return mkHReg( 8, HRcVec128, False); }
-HReg hregAMD64_XMM9 ( void ) { return mkHReg( 9, HRcVec128, False); }
-HReg hregAMD64_XMM10 ( void ) { return mkHReg(10, HRcVec128, False); }
-HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
-HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
-
-
-void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
-{
-#if 0
- *nregs = 6;
- *arr = LibVEX_Alloc_inline(*nregs * sizeof(HReg));
- (*arr)[ 0] = hregAMD64_RSI();
- (*arr)[ 1] = hregAMD64_RDI();
- (*arr)[ 2] = hregAMD64_RBX();
-
- (*arr)[ 3] = hregAMD64_XMM7();
- (*arr)[ 4] = hregAMD64_XMM8();
- (*arr)[ 5] = hregAMD64_XMM9();
-#endif
-#if 1
- *nregs = 20;
- *arr = LibVEX_Alloc_inline(*nregs * sizeof(HReg));
- (*arr)[ 0] = hregAMD64_RSI();
- (*arr)[ 1] = hregAMD64_RDI();
- (*arr)[ 2] = hregAMD64_R8();
- (*arr)[ 3] = hregAMD64_R9();
- (*arr)[ 4] = hregAMD64_R12();
- (*arr)[ 5] = hregAMD64_R13();
- (*arr)[ 6] = hregAMD64_R14();
- (*arr)[ 7] = hregAMD64_R15();
- (*arr)[ 8] = hregAMD64_RBX();
-
- (*arr)[ 9] = hregAMD64_XMM3();
- (*arr)[10] = hregAMD64_XMM4();
- (*arr)[11] = hregAMD64_XMM5();
- (*arr)[12] = hregAMD64_XMM6();
- (*arr)[13] = hregAMD64_XMM7();
- (*arr)[14] = hregAMD64_XMM8();
- (*arr)[15] = hregAMD64_XMM9();
- (*arr)[16] = hregAMD64_XMM10();
- (*arr)[17] = hregAMD64_XMM11();
- (*arr)[18] = hregAMD64_XMM12();
- (*arr)[19] = hregAMD64_R10();
-#endif
-}
-
/* --------- Condition codes, Intel encoding. --------- */
@@ -1962,80 +1943,61 @@
/* --------- The amd64 assembler (bleh.) --------- */
/* Produce the low three bits of an integer register number. */
-static UChar iregBits210 ( HReg r )
+inline static UInt iregEnc210 ( HReg r )
{
UInt n;
vassert(hregClass(r) == HRcInt64);
vassert(!hregIsVirtual(r));
- n = hregNumber(r);
+ n = hregEncoding(r);
vassert(n <= 15);
- return toUChar(n & 7);
+ return n & 7;
}
/* Produce bit 3 of an integer register number. */
-static UChar iregBit3 ( HReg r )
+inline static UInt iregEnc3 ( HReg r )
{
UInt n;
vassert(hregClass(r) == HRcInt64);
vassert(!hregIsVirtual(r));
- n = hregNumber(r);
+ n = hregEncoding(r);
vassert(n <= 15);
- return toUChar((n >> 3) & 1);
+ return (n >> 3) & 1;
}
/* Produce a complete 4-bit integer register number. */
-static UChar iregBits3210 ( HReg r )
+inline static UInt iregEnc3210 ( HReg r )
{
UInt n;
vassert(hregClass(r) == HRcInt64);
vassert(!hregIsVirtual(r));
- n = hregNumber(r);
+ n = hregEncoding(r);
vassert(n <= 15);
- return toUChar(n);
+ return n;
}
-/* Given an xmm (128bit V-class) register number, produce the
- equivalent numbered register in 64-bit I-class. This is a bit of
- fakery which facilitates using functions that work on integer
- register numbers to be used when assembling SSE instructions
- too. */
-static HReg vreg2ireg ( HReg r )
+/* Produce a complete 4-bit integer register number. */
+inline static UInt vregEnc3210 ( HReg r )
{
UInt n;
vassert(hregClass(r) == HRcVec128);
vassert(!hregIsVirtual(r));
- n = hregNumber(r);
+ n = hregEncoding(r);
vassert(n <= 15);
- return mkHReg(n, HRcInt64, False);
+ return n;
}
-//uu /* Ditto for ymm regs. */
-//uu static HReg dvreg2ireg ( HReg r )
-//uu {
-//uu UInt n;
-//uu vassert(hregClass(r) == HRcVec256);
-//uu vassert(!hregIsVirtual(r));
-//uu n = hregNumber(r);
-//uu vassert(n <= 15);
-//uu return mkHReg(n, HRcInt64, False);
-//uu }
-
-static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
+inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
{
vassert(mod < 4);
vassert((reg|regmem) < 8);
- return toUChar( ((mod & 3) << 6)
- | ((reg & 7) << 3)
- | (regmem & 7) );
+ return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
}
-static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
+inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
{
vassert(shift < 4);
vassert((regindex|regbase) < 8);
- return toUChar( ((shift & 3) << 6)
- | ((regindex & 7) << 3)
- | (regbase & 7) );
+ return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
}
static UChar* emit32 ( UChar* p, UInt w32 )
@@ -2102,8 +2064,9 @@
| index != RSP
= 10 greg 100, scale index base, d32
*/
-static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
+static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
{
+ UInt gregEnc210 = gregEnc3210 & 7;
if (am->tag == Aam_IR) {
if (am->Aam.IR.imm == 0
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
@@ -2111,39 +2074,36 @@
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
) {
- *p++ = mkModRegRM(0, iregBits210(greg),
- iregBits210(am->Aam.IR.reg));
+ *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
return p;
}
if (fits8bits(am->Aam.IR.imm)
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
) {
- *p++ = mkModRegRM(1, iregBits210(greg),
- iregBits210(am->Aam.IR.reg));
+ *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
*p++ = toUChar(am->Aam.IR.imm & 0xFF);
return p;
}
if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
&& ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
) {
- *p++ = mkModRegRM(2, iregBits210(greg),
- iregBits210(am->Aam.IR.reg));
+ *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
p = emit32(p, am->Aam.IR.imm);
return p;
}
if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
|| sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
&& fits8bits(am->Aam.IR.imm)) {
- *p++ = mkModRegRM(1, iregBits210(greg), 4);
+ *p++ = mkModRegRM(1, gregEnc210, 4);
*p++ = 0x24;
*p++ = toUChar(am->Aam.IR.imm & 0xFF);
return p;
}
if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
- || wait for test case for RSP case */
+ || wait for test case for RSP case */
sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
- *p++ = mkModRegRM(2, iregBits210(greg), 4);
+ *p++ = mkModRegRM(2, gregEnc210, 4);
*p++ = 0x24;
p = emit32(p, am->Aam.IR.imm);
return p;
@@ -2155,16 +2115,16 @@
if (am->tag == Aam_IRRS) {
if (fits8bits(am->Aam.IRRS.imm)
&& ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
- *p++ = mkModRegRM(1, iregBits210(greg), 4);
- *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
- iregBits210(am->Aam.IRRS.base));
+ *p++ = mkModRegRM(1, gregEnc210, 4);
+ *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
+ iregEnc210(am->Aam.IRRS.base));
*p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
return p;
}
if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
- *p++ = mkModRegRM(2, iregBits210(greg), 4);
- *p++ = mkSIB(am->Aam.IRRS.shift, iregBits210(am->Aam.IRRS.index),
- iregBits210(am->Aam.IRRS.base));
+ *p++ = mkModRegRM(2, gregEnc210, 4);
+ *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
+ iregEnc210(am->Aam.IRRS.base));
p = emit32(p, am->Aam.IRRS.imm);
return p;
}
@@ -2176,52 +2136,122 @@
/*NOTREACHED*/
}
+static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
+{
+ return doAMode_M__wrk(p, iregEnc3210(greg), am);
+}
+
+static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
+{
+ vassert(gregEnc3210 < 16);
+ return doAMode_M__wrk(p, gregEnc3210, am);
+}
+
/* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
-static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
+inline
+static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
{
- *p++ = mkModRegRM(3, iregBits210(greg), iregBits210(ereg));
+ *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
return p;
}
+static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
+{
+ return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
+}
+
+static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
+{
+ vassert(gregEnc3210 < 16);
+ return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
+}
+
+static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
+{
+ vassert(eregEnc3210 < 16);
+ return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
+}
+
+static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
+{
+ vassert( (gregEnc3210|eregEnc3210) < 16);
+ return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
+}
+
/* Clear the W bit on a REX byte, thereby changing the operand size
back to whatever that instruction's default operand size is. */
static inline UChar clearWBit ( UChar rex )
{
- return toUChar(rex & ~(1<<3));
+ return rex & ~(1<<3);
}
/* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
-static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
+inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
{
if (am->tag == Aam_IR) {
UChar W = 1; /* we want 64-bit mode */
- UChar R = iregBit3(greg);
+ UChar R = (gregEnc3210 >> 3) & 1;
UChar X = 0; /* not relevant */
- UChar B = iregBit3(am->Aam.IR.reg);
- return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ UChar B = iregEnc3(am->Aam.IR.reg);
+ return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
}
if (am->tag == Aam_IRRS) {
UChar W = 1; /* we want 64-bit mode */
- UChar R = iregBit3(greg);
- UChar X = iregBit3(am->Aam.IRRS.index);
- UChar B = iregBit3(am->Aam.IRRS.base);
- return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ UChar R = (gregEnc3210 >> 3) & 1;
+ UChar X = iregEnc3(am->Aam.IRRS.index);
+ UChar B = iregEnc3(am->Aam.IRRS.base);
+ return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
}
vassert(0);
return 0; /*NOTREACHED*/
}
+static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
+{
+ return rexAMode_M__wrk(iregEnc3210(greg), am);
+}
+
+static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
+{
+ vassert(gregEnc3210 < 16);
+ return rexAMode_M__wrk(gregEnc3210, am);
+}
+
+
/* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
-static UChar rexAMode_R ( HReg greg, HReg ereg )
+inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
{
UChar W = 1; /* we want 64-bit mode */
- UChar R = iregBit3(greg);
+ UChar R = (gregEnc3210 >> 3) & 1;
UChar X = 0; /* not relevant */
- UChar B = iregBit3(ereg);
- return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ UChar B = (eregEnc3210 >> 3) & 1;
+ return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
+}
+
+static UChar rexAMode_R ( HReg greg, HReg ereg )
+{
+ return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
+}
+
+static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
+{
+ vassert(gregEnc3210 < 16);
+ return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
+}
+
+static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
+{
+ vassert(eregEnc3210 < 16);
+ return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
+}
+
+static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
+{
+ vassert((gregEnc3210|eregEnc3210) < 16);
+ return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
}
@@ -2271,14 +2301,14 @@
//uu UChar rexB = 0;
//uu /* Same logic as in rexAMode_M. */
//uu if (am->tag == Aam_IR) {
-//uu rexR = iregBit3(greg);
+//uu rexR = iregEnc3(greg);
//uu rexX = 0; /* not relevant */
-//uu rexB = iregBit3(am->Aam.IR.reg);
+//uu rexB = iregEnc3(am->Aam.IR.reg);
//uu }
//uu else if (am->tag == Aam_IRRS) {
-//uu rexR = iregBit3(greg);
-//uu rexX = iregBit3(am->Aam.IRRS.index);
-//uu rexB = iregBit3(am->Aam.IRRS.base);
+//uu rexR = iregEnc3(greg);
+//uu rexX = iregEnc3(am->Aam.IRRS.index);
+//uu rexB = iregEnc3(am->Aam.IRRS.base);
//uu } else {
//uu vassert(0);
//uu }
@@ -2339,11 +2369,6 @@
vassert(nbuf >= 32);
vassert(mode64 == True);
- /* Wrap an integer as a int register, for use assembling
- GrpN insns, in which the greg field is used as a sub-opcode
- and does not really contain a register. */
-# define fake(_n) mkHReg((_n), HRcInt64, False)
-
/* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
switch (i->tag) {
@@ -2354,13 +2379,13 @@
widening rule) for constants under 1 million. We could
use this form for the range 0 to 0x7FFFFFFF inclusive, but
limit it to a smaller range for verifiability purposes. */
- if (1 & iregBit3(i->Ain.Imm64.dst))
+ if (1 & iregEnc3(i->Ain.Imm64.dst))
*p++ = 0x41;
- *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
+ *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
p = emit32(p, (UInt)i->Ain.Imm64.imm64);
} else {
- *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
- *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
+ *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
+ *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
p = emit64(p, i->Ain.Imm64.imm64);
}
goto done;
@@ -2380,14 +2405,14 @@
of the dst to be zeroed out too. This saves 1
and sometimes 2 bytes compared to the more
obvious encoding in the 'else' branch. */
- if (1 & iregBit3(i->Ain.Alu64R.dst))
+ if (1 & iregEnc3(i->Ain.Alu64R.dst))
*p++ = 0x41;
- *p++ = 0xB8 + iregBits210(i->Ain.Alu64R.dst);
+ *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
} else {
- *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Alu64R.dst)));
+ *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
*p++ = 0xC7;
- *p++ = toUChar(0xC0 + iregBits210(i->Ain.Alu64R.dst));
+ *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
}
goto done;
@@ -2475,14 +2500,14 @@
p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
} else
if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
- *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst );
+ *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
*p++ = 0x83;
- p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
+ p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
*p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
} else {
- *p++ = rexAMode_R( fake(0), i->Ain.Alu64R.dst);
+ *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
*p++ = 0x81;
- p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu64R.dst);
+ p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
}
goto done;
@@ -2517,9 +2542,9 @@
i->Ain.Alu64M.dst);
goto done;
case Ari_Imm:
- *p++ = rexAMode_M(fake(0), i->Ain.Alu64M.dst);
+ *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
*p++ = 0xC7;
- p = doAMode_M(p, fake(0), i->Ain.Alu64M.dst);
+ p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
goto done;
default:
@@ -2537,14 +2562,14 @@
default: goto bad;
}
if (i->Ain.Sh64.src == 0) {
- *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
*p++ = toUChar(opc_cl);
- p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
+ p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
goto done;
} else {
- *p++ = rexAMode_R(fake(0), i->Ain.Sh64.dst);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
*p++ = toUChar(opc_imm);
- p = doAMode_R(p, fake(subopc), i->Ain.Sh64.dst);
+ p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
*p++ = (UChar)(i->Ain.Sh64.src);
goto done;
}
@@ -2552,23 +2577,23 @@
case Ain_Test64:
/* testq sign-extend($imm32), %reg */
- *p++ = rexAMode_R(fake(0), i->Ain.Test64.dst);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
*p++ = 0xF7;
- p = doAMode_R(p, fake(0), i->Ain.Test64.dst);
+ p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
p = emit32(p, i->Ain.Test64.imm32);
goto done;
case Ain_Unary64:
if (i->Ain.Unary64.op == Aun_NOT) {
- *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
*p++ = 0xF7;
- p = doAMode_R(p, fake(2), i->Ain.Unary64.dst);
+ p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
goto done;
}
if (i->Ain.Unary64.op == Aun_NEG) {
- *p++ = rexAMode_R(fake(0), i->Ain.Unary64.dst);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
*p++ = 0xF7;
- p = doAMode_R(p, fake(3), i->Ain.Unary64.dst);
+ p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
goto done;
}
break;
@@ -2606,16 +2631,16 @@
p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
} else
if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
- rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
+ rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
if (rex != 0x40) *p++ = rex;
*p++ = 0x83;
- p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
+ p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
*p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
} else {
- rex = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
+ rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
if (rex != 0x40) *p++ = rex;
*p++ = 0x81;
- p = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
+ p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
}
goto done;
@@ -2646,18 +2671,14 @@
subopc = i->Ain.MulL.syned ? 5 : 4;
switch (i->Ain.MulL.src->tag) {
case Arm_Mem:
- *p++ = rexAMode_M( fake(0),
- i->Ain.MulL.src->Arm.Mem.am);
+ *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
*p++ = 0xF7;
- p = doAMode_M(p, fake(subopc),
- i->Ain.MulL.src->Arm.Mem.am);
+ p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
goto done;
case Arm_Reg:
- *p++ = rexAMode_R(fake(0),
- i->Ain.MulL.src->Arm.Reg.reg);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
*p++ = 0xF7;
- p = doAMode_R(p, fake(subopc),
- i->Ain.MulL.src->Arm.Reg.reg);
+ p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
goto done;
default:
goto bad;
@@ -2672,15 +2693,13 @@
goto bad;
/*FIXME*/
*p++ = 0xF7;
- p = doAMode_M(p, fake(subopc),
- i->Ain.Div.src->Arm.Mem.am);
+ p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
goto done;
case Arm_Reg:
*p++ = clearWBit(
- rexAMode_R( fake(0), i->Ain.Div.src->Arm.Reg.reg));
+ rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
*p++ = 0xF7;
- p = doAMode_R(p, fake(subopc),
- i->Ain.Div.src->Arm.Reg.reg);
+ p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
goto done;
default:
goto bad;
@@ -2689,18 +2708,14 @@
if (i->Ain.Div.sz == 8) {
switch (i->Ain.Div.src->tag) {
case Arm_Mem:
- *p++ = rexAMode_M( fake(0),
- i->Ain.Div.src->Arm.Mem.am);
+ *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
*p++ = 0xF7;
- p = doAMode_M(p, fake(subopc),
- i->Ain.Div.src->Arm.Mem.am);
+ p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
goto done;
case Arm_Reg:
- *p++ = rexAMode_R( fake(0),
- i->Ain.Div.src->Arm.Reg.reg);
+ *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
*p++ = 0xF7;
- p = doAMode_R(p, fake(subopc),
- i->Ain.Div.src->Arm.Reg.reg);
+ p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
goto done;
default:
goto bad;
@@ -2712,17 +2727,17 @@
switch (i->Ain.Push.src->tag) {
case Armi_Mem:
*p++ = clearWBit(
- rexAMode_M(fake(0), i->Ain.Push.src->Armi.Mem.am));
+ rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
*p++ = 0xFF;
- p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
+ p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
goto done;
case Armi_Imm:
*p++ = 0x68;
p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
goto done;
case Armi_Reg:
- *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg)));
- *p++ = toUChar(0x50 + iregBits210(i->Ain.Push.src->Armi.Reg.reg));
+ *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
+ *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
goto done;
default:
goto bad;
@@ -3148,7 +3163,7 @@
of the destination should be forced to zero, but doing 'xorq
%r,%r' kills the flag(s) we are about to read. Sigh. So
start off my moving $0 into the dest. */
- reg = iregBits3210(i->Ain.Set64.dst);
+ reg = iregEnc3210(i->Ain.Set64.dst);
vassert(reg < 16);
/* movq $0, %dst */
@@ -3206,13 +3221,13 @@
/* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
aren't encoded in the insn. */
- rex = rexAMode_M( fake(1), i->Ain.ACAS.addr );
+ rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
if (i->Ain.ACAS.sz != 8)
rex = clearWBit(rex);
*p++ = rex;
*p++ = 0x0F;
*p++ = 0xC7;
- p = doAMode_M(p, fake(1), i->Ain.DACAS.addr);
+ p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
goto done;
case Ain_A87Free:
@@ -3227,15 +3242,15 @@
if (i->Ain.A87PushPop.isPush) {
/* Load from memory into %st(0): flds/fldl amode */
*p++ = clearWBit(
- rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
+ rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
*p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
- p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
+ p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
} else {
/* Dump %st(0) to memory: fstps/fstpl amode */
*p++ = clearWBit(
- rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
+ rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
*p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
- p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
+ p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
goto done;
}
goto done;
@@ -3276,16 +3291,16 @@
case Ain_A87LdCW:
*p++ = clearWBit(
- rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
+ rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
*p++ = 0xD9;
- p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
+ p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
goto done;
case Ain_A87StSW:
*p++ = clearWBit(
- rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
+ rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
*p++ = 0xDD;
- p = doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
+ p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
goto done;
case Ain_Store:
@@ -3318,10 +3333,10 @@
break;
case Ain_LdMXCSR:
- *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
+ *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
*p++ = 0x0F;
*p++ = 0xAE;
- p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
+ p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
goto done;
case Ain_SseUComIS:
@@ -3334,53 +3349,53 @@
vassert(i->Ain.SseUComIS.sz == 4);
}
*p++ = clearWBit (
- rexAMode_R( vreg2ireg(i->Ain.SseUComIS.srcL),
- vreg2ireg(i->Ain.SseUComIS.srcR) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
+ vregEnc3210(i->Ain.SseUComIS.srcR) ));
*p++ = 0x0F;
*p++ = 0x2E;
- p = doAMode_R(p, vreg2ireg(i->Ain.SseUComIS.srcL),
- vreg2ireg(i->Ain.SseUComIS.srcR) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
+ vregEnc3210(i->Ain.SseUComIS.srcR) );
/* pushfq */
*p++ = 0x9C;
/* popq %dst */
- *p++ = toUChar(0x40 + (1 & iregBit3(i->Ain.SseUComIS.dst)));
- *p++ = toUChar(0x58 + iregBits210(i->Ain.SseUComIS.dst));
+ *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
+ *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
goto done;
case Ain_SseSI2SF:
/* cvssi2s[sd] %src, %dst */
- rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
- i->Ain.SseSI2SF.src );
+ rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
*p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
*p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
*p++ = 0x0F;
*p++ = 0x2A;
- p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
- i->Ain.SseSI2SF.src );
+ p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
goto done;
case Ain_SseSF2SI:
/* cvss[sd]2si %src, %dst */
- rex = rexAMode_R( i->Ain.SseSF2SI.dst,
- vreg2ireg(i->Ain.SseSF2SI.src) );
+ rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
+ vregEnc3210(i->Ain.SseSF2SI.src) );
*p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
*p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
*p++ = 0x0F;
*p++ = 0x2D;
- p = doAMode_R( p, i->Ain.SseSF2SI.dst,
- vreg2ireg(i->Ain.SseSF2SI.src) );
+ p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
+ vregEnc3210(i->Ain.SseSF2SI.src) );
goto done;
case Ain_SseSDSS:
/* cvtsd2ss/cvtss2sd %src, %dst */
*p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.SseSDSS.dst),
- vreg2ireg(i->Ain.SseSDSS.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
+ vregEnc3210(i->Ain.SseSDSS.src) ));
*p++ = 0x0F;
*p++ = 0x5A;
- p = doAMode_R( p, vreg2ireg(i->Ain.SseSDSS.dst),
- vreg2ireg(i->Ain.SseSDSS.src) );
+ p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
+ vregEnc3210(i->Ain.SseSDSS.src) );
goto done;
case Ain_SseLdSt:
@@ -3394,10 +3409,12 @@
vassert(0);
}
*p++ = clearWBit(
- rexAMode_M( vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr));
+ rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
+ i->Ain.SseLdSt.addr));
*p++ = 0x0F;
*p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
- p = doAMode_M(p, vreg2ireg(i->Ain.SseLdSt.reg), i->Ain.SseLdSt.addr);
+ p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
+ i->Ain.SseLdSt.addr);
goto done;
case Ain_SseLdzLO:
@@ -3405,19 +3422,19 @@
/* movs[sd] amode, %xmm-dst */
*p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
*p++ = clearWBit(
- rexAMode_M(vreg2ireg(i->Ain.SseLdzLO.reg),
- i->Ain.SseLdzLO.addr));
+ rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
+ i->Ain.SseLdzLO.addr));
*p++ = 0x0F;
*p++ = 0x10;
- p = doAMode_M(p, vreg2ireg(i->Ain.SseLdzLO.reg),
- i->Ain.SseLdzLO.addr);
+ p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
+ i->Ain.SseLdzLO.addr);
goto done;
case Ain_Sse32Fx4:
xtra = 0;
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.Sse32Fx4.dst),
- vreg2ireg(i->Ain.Sse32Fx4.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
+ vregEnc3210(i->Ain.Sse32Fx4.src) ));
*p++ = 0x0F;
switch (i->Ain.Sse32Fx4.op) {
case Asse_ADDF: *p++ = 0x58; break;
@@ -3435,8 +3452,8 @@
case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
default: goto bad;
}
- p = doAMode_R(p, vreg2ireg(i->Ain.Sse32Fx4.dst),
- vreg2ireg(i->Ain.Sse32Fx4.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
+ vregEnc3210(i->Ain.Sse32Fx4.src) );
if (xtra & 0x100)
*p++ = toUChar(xtra & 0xFF);
goto done;
@@ -3445,8 +3462,8 @@
xtra = 0;
*p++ = 0x66;
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.Sse64Fx2.dst),
- vreg2ireg(i->Ain.Sse64Fx2.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
+ vregEnc3210(i->Ain.Sse64Fx2.src) ));
*p++ = 0x0F;
switch (i->Ain.Sse64Fx2.op) {
case Asse_ADDF: *p++ = 0x58; break;
@@ -3462,8 +3479,8 @@
case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
default: goto bad;
}
- p = doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
- vreg2ireg(i->Ain.Sse64Fx2.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
+ vregEnc3210(i->Ain.Sse64Fx2.src) );
if (xtra & 0x100)
*p++ = toUChar(xtra & 0xFF);
goto done;
@@ -3472,8 +3489,8 @@
xtra = 0;
*p++ = 0xF3;
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.Sse32FLo.dst),
- vreg2ireg(i->Ain.Sse32FLo.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
+ vregEnc3210(i->Ain.Sse32FLo.src) ));
*p++ = 0x0F;
switch (i->Ain.Sse32FLo.op) {
case Asse_ADDF: *p++ = 0x58; break;
@@ -3491,8 +3508,8 @@
case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
default: goto bad;
}
- p = doAMode_R(p, vreg2ireg(i->Ain.Sse32FLo.dst),
- vreg2ireg(i->Ain.Sse32FLo.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
+ vregEnc3210(i->Ain.Sse32FLo.src) );
if (xtra & 0x100)
*p++ = toUChar(xtra & 0xFF);
goto done;
@@ -3501,8 +3518,8 @@
xtra = 0;
*p++ = 0xF2;
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.Sse64FLo.dst),
- vreg2ireg(i->Ain.Sse64FLo.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
+ vregEnc3210(i->Ain.Sse64FLo.src) ));
*p++ = 0x0F;
switch (i->Ain.Sse64FLo.op) {
case Asse_ADDF: *p++ = 0x58; break;
@@ -3518,8 +3535,8 @@
case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
default: goto bad;
}
- p = doAMode_R(p, vreg2ireg(i->Ain.Sse64FLo.dst),
- vreg2ireg(i->Ain.Sse64FLo.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
+ vregEnc3210(i->Ain.Sse64FLo.src) );
if (xtra & 0x100)
*p++ = toUChar(xtra & 0xFF);
goto done;
@@ -3528,8 +3545,8 @@
# define XX(_n) *p++ = (_n)
rex = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.SseReRg.dst),
- vreg2ireg(i->Ain.SseReRg.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
+ vregEnc3210(i->Ain.SseReRg.src) ));
switch (i->Ain.SseReRg.op) {
case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
@@ -3589,8 +3606,8 @@
case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
default: goto bad;
}
- p = doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
- vreg2ireg(i->Ain.SseReRg.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
+ vregEnc3210(i->Ain.SseReRg.src) );
# undef XX
goto done;
@@ -3602,12 +3619,12 @@
/* movaps %src, %dst */
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.SseCMov.dst),
- vreg2ireg(i->Ain.SseCMov.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
+ vregEnc3210(i->Ain.SseCMov.src) ));
*p++ = 0x0F;
*p++ = 0x28;
- p = doAMode_R(p, vreg2ireg(i->Ain.SseCMov.dst),
- vreg2ireg(i->Ain.SseCMov.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
+ vregEnc3210(i->Ain.SseCMov.src) );
/* Fill in the jump offset. */
*(ptmp-1) = toUChar(p - ptmp);
@@ -3616,12 +3633,12 @@
case Ain_SseShuf:
*p++ = 0x66;
*p++ = clearWBit(
- rexAMode_R( vreg2ireg(i->Ain.SseShuf.dst),
- vreg2ireg(i->Ain.SseShuf.src) ));
+ rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
+ vregEnc3210(i->Ain.SseShuf.src) ));
*p++ = 0x0F;
*p++ = 0x70;
- p = doAMode_R(p, vreg2ireg(i->Ain.SseShuf.dst),
- vreg2ireg(i->Ain.SseShuf.src) );
+ p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
+ vregEnc3210(i->Ain.SseShuf.src) );
*p++ = (UChar)(i->Ain.SseShuf.order);
goto done;
@@ -3652,14 +3669,14 @@
/* --- decl 8(%rbp) --- */
/* Need to compute the REX byte for the decl in order to prove
that we don't need it, since this is a 32-bit inc and all
- registers involved in the amode are < r8. "fake(1)" because
+ registers involved in the amode are < r8. "1" because
there's no register in this encoding; instead the register
field is used as a sub opcode. The encoding for "decl r/m32"
- is FF /1, hence the fake(1). */
- rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
+ is FF /1, hence the "1". */
+ rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
*p++ = 0xFF;
- p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
+ p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
vassert(p - p0 == 3);
/* --- jns nofail --- */
*p++ = 0x79;
@@ -3669,10 +3686,10 @@
/* Once again, verify we don't need REX. The encoding is FF /4.
We don't need REX.W since by default FF /4 in 64-bit mode
implies a 64 bit load. */
- rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
+ rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
if (rex != 0x40) goto bad;
*p++ = 0xFF;
- p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
+ p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
/* And crosscheck .. */
vassert(evCheckSzB_AMD64() == 8);
@@ -3710,8 +3727,6 @@
done:
vassert(p - &buf[0] <= 32);
return p - &buf[0];
-
-# undef fake
}
Modified: trunk/priv/host_amd64_defs.h
==============================================================================
--- trunk/priv/host_amd64_defs.h (original)
+++ trunk/priv/host_amd64_defs.h Thu Mar 26 07:18:32 2015
@@ -46,44 +46,42 @@
float regs, and 16 real vector regs.
*/
-extern void ppHRegAMD64 ( HReg );
+#define ST_IN static inline
+ST_IN HReg hregAMD64_RSI ( void ) { return mkHReg(False, HRcInt64, 6, 0); }
+ST_IN HReg hregAMD64_RDI ( void ) { return mkHReg(False, HRcInt64, 7, 1); }
+ST_IN HReg hregAMD64_R8 ( void ) { return mkHReg(False, HRcInt64, 8, 2); }
+ST_IN HReg hregAMD64_R9 ( void ) { return mkHReg(False, HRcInt64, 9, 3); }
+ST_IN HReg hregAMD64_R12 ( void ) { return mkHReg(False, HRcInt64, 12, 4); }
+ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); }
+ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); }
+ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); }
+ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); }
+
+ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); }
+ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); }
+ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); }
+ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); }
+ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); }
+ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); }
+ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); }
+ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); }
+ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); }
+ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); }
+
+ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); }
+
+ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); }
+ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); }
+ST_IN HReg hregAMD64_RDX ( void ) { return mkHReg(False, HRcInt64, 2, 22); }
+ST_IN HReg hregAMD64_RSP ( void ) { return mkHReg(False, HRcInt64, 4, 23); }
+ST_IN HReg hregAMD64_RBP ( void ) { return mkHReg(False, HRcInt64, 5, 24); }
+ST_IN HReg hregAMD64_R11 ( void ) { return mkHReg(False, HRcInt64, 11, 25); }
+
+ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); }
+ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); }
+#undef ST_IN
-extern HReg hregAMD64_RAX ( void );
-extern HReg hregAMD64_RBX ( void );
-extern HReg hregAMD64_RCX ( void );
-extern HReg hregAMD64_RDX ( void );
-extern HReg hregAMD64_RSP ( void );
-extern HReg hregAMD64_RBP ( void );
-extern HReg hregAMD64_RSI ( void );
-extern HReg hregAMD64_RDI ( void );
-extern HReg hregAMD64_R8 ( void );
-extern HReg hregAMD64_R9 ( void );
-extern HReg hregAMD64_R10 ( void );
-extern HReg hregAMD64_R11 ( void );
-extern HReg hregAMD64_R12 ( void );
-extern HReg hregAMD64_R13 ( void );
-extern HReg hregAMD64_R14 ( void );
-extern HReg hregAMD64_R15 ( void );
-
-extern HReg hregAMD64_FAKE0 ( void );
-extern HReg hregAMD64_FAKE1 ( void );
-extern HReg hregAMD64_FAKE2 ( void );
-extern HReg hregAMD64_FAKE3 ( void );
-extern HReg hregAMD64_FAKE4 ( void );
-extern HReg hregAMD64_FAKE5 ( void );
-
-extern HReg hregAMD64_XMM0 ( void );
-extern HReg hregAMD64_XMM1 ( void );
-extern HReg hregAMD64_XMM3 ( void );
-extern HReg hregAMD64_XMM4 ( void );
-extern HReg hregAMD64_XMM5 ( void );
-extern HReg hregAMD64_XMM6 ( void );
-extern HReg hregAMD64_XMM7 ( void );
-extern HReg hregAMD64_XMM8 ( void );
-extern HReg hregAMD64_XMM9 ( void );
-extern HReg hregAMD64_XMM10 ( void );
-extern HReg hregAMD64_XMM11 ( void );
-extern HReg hregAMD64_XMM12 ( void );
+extern void ppHRegAMD64 ( HReg );
/* --------- Condition codes, AMD encoding. --------- */
@@ -790,7 +788,8 @@
extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
-extern void getAllocableRegs_AMD64 ( Int*, HReg** );
+extern const RRegUniverse* getRRegUniverse_AMD64 ( void );
+
extern HInstrArray* iselSB_AMD64 ( const IRSB*,
VexArch,
const VexArchInfo*,
Modified: trunk/priv/host_amd64_isel.c
==============================================================================
--- trunk/priv/host_amd64_isel.c (original)
+++ trunk/priv/host_amd64_isel.c Thu Mar 26 07:18:32 2015
@@ -193,14 +193,14 @@
static HReg newVRegI ( ISelEnv* env )
{
- HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
+ HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0/*enc*/, env->vreg_ctr);
env->vreg_ctr++;
return reg;
}
static HReg newVRegV ( ISelEnv* env )
{
- HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
+ HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
env->vreg_ctr++;
return reg;
}
@@ -4951,20 +4951,20 @@
switch (bb->tyenv->types[i]) {
case Ity_I1:
case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
- hreg = mkHReg(j++, HRcInt64, True);
+ hreg = mkHReg(True, HRcInt64, 0, j++);
break;
case Ity_I128:
- hreg = mkHReg(j++, HRcInt64, True);
- hregHI = mkHReg(j++, HRcInt64, True);
+ hreg = mkHReg(True, HRcInt64, 0, j++);
+ hregHI = mkHReg(True, HRcInt64, 0, j++);
break;
case Ity_F32:
case Ity_F64:
case Ity_V128:
- hreg = mkHReg(j++, HRcVec128, True);
+ hreg = mkHReg(True, HRcVec128, 0, j++);
break;
case Ity_V256:
- hreg = mkHReg(j++, HRcVec128, True);
- hregHI = mkHReg(j++, HRcVec128, True);
+ hreg = mkHReg(True, HRcVec128, 0, j++);
+ hregHI = mkHReg(True, HRcVec128, 0, j++);
break;
default:
ppIRType(bb->tyenv->types[i]);
Modified: trunk/priv/host_arm64_defs.c
==============================================================================
--- trunk/priv/host_arm64_defs.c (original)
+++ trunk/priv/host_arm64_defs.c Thu Mar 26 07:18:32 2015
@@ -36,8 +36,6 @@
#include "host_generic_regs.h"
#include "host_arm64_defs.h"
-//ZZ UInt arm_hwcaps = 0;
-
/* --------- Registers. --------- */
@@ -47,129 +45,71 @@
Q regs (128 bit vector)
*/
-void ppHRegARM64 ( HReg reg ) {
- Int r;
- /* Be generic for all virtual regs. */
- if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
- }
- /* But specific for real regs. */
- switch (hregClass(reg)) {
- case HRcInt64:
- r = hregNumber(reg);
- vassert(r >= 0 && r < 31);
- vex_printf("x%d", r);
- return;
- case HRcFlt64:
- r = hregNumber(reg);
- vassert(r >= 0 && r < 32);
- vex_printf("d%d", r);
- return;
- case HRcVec128:
- r = hregNumber(reg);
- vassert(r >= 0 && r < 32);
- vex_printf("q%d", r);
- return;
- default:
- vpanic("ppHRegARM64");
- }
-}
-
-static void ppHRegARM64asSreg ( HReg reg ) {
- ppHRegARM64(reg);
- vex_printf("(S-reg)");
-}
-
-HReg hregARM64_X0 ( void ) { return mkHReg(0, HRcInt64, False); }
-HReg hregARM64_X1 ( void ) { return mkHReg(1, HRcInt64, False); }
-HReg hregARM64_X2 ( void ) { return mkHReg(2, HRcInt64, False); }
-HReg hregARM64_X3 ( void ) { return mkHReg(3, HRcInt64, False); }
-HReg hregARM64_X4 ( void ) { return mkHReg(4, HRcInt64, False); }
-HReg hregARM64_X5 ( void ) { return mkHReg(5, HRcInt64, False); }
-HReg hregARM64_X6 ( void ) { return mkHReg(6, HRcInt64, False); }
-HReg hregARM64_X7 ( void ) { return mkHReg(7, HRcInt64, False); }
-HReg hregARM64_X8 ( void ) { return mkHReg(8, HRcInt64, False); }
-HReg hregARM64_X9 ( void ) { return mkHReg(9, HRcInt64, False); }
-HReg hregARM64_X10 ( void ) { return mkHReg(10, HRcInt64, False); }
-HReg hregARM64_X11 ( void ) { return mkHReg(11, HRcInt64, False); }
-HReg hregARM64_X12 ( void ) { return mkHReg(12, HRcInt64, False); }
-HReg hregARM64_X13 ( void ) { return mkHReg(13, HRcInt64, False); }
-HReg hregARM64_X14 ( void ) { return mkHReg(14, HRcInt64, False); }
-HReg hregARM64_X15 ( void ) { return mkHReg(15, HRcInt64, False); }
-HReg hregARM64_X21 ( void ) { return mkHReg(21, HRcInt64, False); }
-HReg hregARM64_X22 ( void ) { return mkHReg(22, HRcInt64, False); }
-HReg hregARM64_X23 ( void ) { return mkHReg(23, HRcInt64, False); }
-HReg hregARM64_X24 ( void ) { return mkHReg(24, HRcInt64, False); }
-HReg hregARM64_X25 ( void ) { return mkHReg(25, HRcInt64, False); }
-HReg hregARM64_X26 ( void ) { return mkHReg(26, HRcInt64, False); }
-HReg hregARM64_X27 ( void ) { return mkHReg(27, HRcInt64, False); }
-HReg hregARM64_X28 ( void ) { return mkHReg(28, HRcInt64, False); }
-
-// Should really use D8 .. D15 for class F64, since they are callee
-// save
-HReg hregARM64_D8 ( void ) { return mkHReg(8, HRcFlt64, False); }
-HReg hregARM64_D9 ( void ) { return mkHReg(9, HRcFlt64, False); }
-HReg hregARM64_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
-HReg hregARM64_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
-HReg hregARM64_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
-HReg hregARM64_D13 ( void ) { return mkHReg(13, HRcFlt64, False); }
-HReg hregARM64_Q16 ( void ) { return mkHReg(16, HRcVec128, False); }
-HReg hregARM64_Q17 ( void ) { return mkHReg(17, HRcVec128, False); }
-HReg hregARM64_Q18 ( void ) { return mkHReg(18, HRcVec128, False); }
-HReg hregARM64_Q19 ( void ) { return mkHReg(19, HRcVec128, False); }
-HReg hregARM64_Q20 ( void ) { return mkHReg(20, HRcVec128, False); }
-
-void getAllocableRegs_ARM64 ( Int* nregs, HReg** arr )
+const RRegUniverse* getRRegUniverse_ARM64 ( void )
{
- Int i = 0;
- *nregs = 26;
- *arr = LibVEX_Alloc_inline(*nregs * sizeof(HReg));
-
- // callee saves ones (22 to 28) are listed first, since we prefer
- // them if they're available
- (*arr)[i++] = hregARM64_X22();
- (*arr)[i++] = hregARM64_X23();
- (*arr)[i++] = hregARM64_X24();
- (*arr)[i++] = hregARM64_X25();
- (*arr)[i++] = hregARM64_X26();
- (*arr)[i++] = hregARM64_X27();
- (*arr)[i++] = hregARM64_X28();
-
- (*arr)[i++] = hregARM64_X0();
- (*arr)[i++] = hregARM64_X1();
- (*arr)[i++] = hregARM64_X2();
- (*arr)[i++] = hregARM64_X3();
- (*arr)[i++] = hregARM64_X4();
- (*arr)[i++] = hregARM64_X5();
- (*arr)[i++] = hregARM64_X6();
- (*arr)[i++] = hregARM64_X7();
+ /* The real-register universe is a big constant, so we just want to
+ initialise it once. */
+ static RRegUniverse rRegUniverse_ARM64;
+ static Bool rRegUniverse_ARM64_initted = False;
+
+ /* Handy shorthand, nothing more */
+ RRegUniverse* ru = &rRegUniverse_ARM64;
+
+ /* This isn't thread-safe. Sigh. */
+ if (LIKELY(rRegUniverse_ARM64_initted))
+ return ru;
+
+ RRegUniverse__init(ru);
+
+ /* Add the registers. The initial segment of this array must be
+ those available for allocation by reg-alloc, and those that
+ follow are not available for allocation. */
+
+ ru->regs[ru->size++] = hregARM64_X22();
+ ru->regs[ru->size++] = hregARM64_X23();
+ ru->regs[ru->size++] = hregARM64_X24();
+ ru->regs[ru->size++] = hregARM64_X25();
+ ru->regs[ru->size++] = hregARM64_X26();
+ ru->regs[ru->size++] = hregARM64_X27();
+ ru->regs[ru->size++] = hregARM64_X28();
+
+ ru->regs[ru->size++] = hregARM64_X0();
+ ru->regs[ru->size++] = hregARM64_X1();
+ ru->regs[ru->size++] = hregARM64_X2();
+ ru->regs[ru->size++] = hregARM64_X3();
+ ru->regs[ru->size++] = hregARM64_X4();
+ ru->regs[ru->size++] = hregARM64_X5();
+ ru->regs[ru->size++] = hregARM64_X6();
+ ru->regs[ru->size++] = hregARM64_X7();
// X8 is used as a ProfInc temporary, not available to regalloc.
// X9 is a chaining/spill temporary, not available to regalloc.
// Do we really need all these?
- //(*arr)[i++] = hregARM64_X10();
- //(*arr)[i++] = hregARM64_X11();
- //(*arr)[i++] = hregARM64_X12();
- //(*arr)[i++] = hregARM64_X13();
- //(*arr)[i++] = hregARM64_X14();
- //(*arr)[i++] = hregARM64_X15();
+ //ru->regs[ru->size++] = hregARM64_X10();
+ //ru->regs[ru->size++] = hregARM64_X11();
+ //ru->regs[ru->size++] = hregARM64_X12();
+ //ru->regs[ru->size++] = hregARM64_X13();
+ //ru->regs[ru->size++] = hregARM64_X14();
+ //ru->regs[ru->size++] = hregARM64_X15();
// X21 is the guest state pointer, not available to regalloc.
// vector regs. Unfortunately not callee-saved.
- (*arr)[i++] = hregARM64_Q16();
- (*arr)[i++] = hregARM64_Q17();
- (*arr)[i++] = hregARM64_Q18();
- (*arr)[i++] = hregARM64_Q19();
- (*arr)[i++] = hregARM64_Q20();
+ ru->regs[ru->size++] = hregARM64_Q16();
+ ru->regs[ru->size++] = hregARM64_Q17();
+ ru->regs[ru->size++] = hregARM64_Q18();
+ ru->regs[ru->size++] = hregARM64_Q19();
+ ru->regs[ru->size++] = hregARM64_Q20();
// F64 regs, all of which are callee-saved
- (*arr)[i++] = hregARM64_D8();
- (*arr)[i++] = hregARM64_D9();
- (*arr)[i++] = hregARM64_D10();
- (*arr)[i++] = hregARM64_D11();
- (*arr)[i++] = hregARM64_D12();
- (*arr)[i++] = hregARM64_D13();
+ ru->regs[ru->size++] = hregARM64_D8();
+ ru->regs[ru->size++] = hregARM64_D9();
+ ru->regs[ru->size++] = hregARM64_D10();
+ ru->regs[ru->size++] = hregARM64_D11();
+ ru->regs[ru->size++] = hregARM64_D12();
+ ru->regs[ru->size++] = hregARM64_D13();
+
+ ru->allocable = ru->size;
+ /* And other regs, not available to the allocator. */
// unavail: x21 as GSP
// x8 is used as a ProfInc temporary
@@ -190,7 +130,50 @@
// If the set of available registers changes or if the e/r status
// changes, be sure to re-check/sync the definition of
// getRegUsage for ARM64Instr_Call too.
- vassert(i == *nregs);
+
+ ru->regs[ru->size++] = hregARM64_X8();
+ ru->regs[ru->size++] = hregARM64_X9();
+ ru->regs[ru->size++] = hregARM64_X21();
+
+ rRegUniverse_ARM64_initted = True;
+
+ RRegUniverse__check_is_sane(ru);
+ return ru;
+}
+
+
+void ppHRegARM64 ( HReg reg ) {
+ Int r;
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt64:
+ r = hregEncoding(reg);
+ vassert(r >= 0 && r < 31);
+ vex_printf("x%...
[truncated message content] |