|
From: <sv...@va...> - 2012-05-27 16:18:26
|
sewardj 2012-05-27 17:18:13 +0100 (Sun, 27 May 2012)
New Revision: 2355
Log:
Remove, or (where it might later come in handy) comment out artefacts
for 256 bit (AVX) code generation on amd64. Although that was the
plan at first, it turns out to be infeasible to generate 256 bit
instructions for the IR created by Memcheck's instrumentation of 256
bit Ity_V256 IR. This is because it would require 256 bit integer
SIMD operations, and AVX as currently available only provides 256 bit
operations for floating point. So, fall back to generating 256 IR
into 128-bit XMM register pairs, and using the existing SSE facilities
in the back end. This change only affects the amd64 back end -- it
does not affect IR, which remains unchanged, and capable of
representing 256 bit vector operations wherever needed.
Modified files:
trunk/priv/host_amd64_defs.c
trunk/priv/host_amd64_defs.h
trunk/priv/host_amd64_isel.c
trunk/priv/host_generic_reg_alloc2.c
trunk/priv/host_generic_regs.c
trunk/priv/host_generic_regs.h
Modified: trunk/priv/host_generic_reg_alloc2.c (+0 -25)
===================================================================
--- trunk/priv/host_generic_reg_alloc2.c 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_generic_reg_alloc2.c 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -208,8 +208,6 @@
static inline void sanity_check_spill_offset ( VRegLR* vreg )
{
switch (vreg->reg_class) {
- case HRcVec256:
- vassert(0 == ((UShort)vreg->spill_offset % 32)); break;
case HRcVec128: case HRcFlt64:
vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
default:
@@ -838,29 +836,6 @@
HRegClass. */
switch (vreg_lrs[j].reg_class) {
- case HRcVec256:
- /* Find four adjacent free slots in which between them
- provide 256 bits in which to spill the vreg. Since we
- are trying to find an 32-byte-aligned slot, move along
- in steps of 4 (slots). */
- for (k = 0; k < N_SPILL64S-3; k += 4)
- if (ss_busy_until_before[k+0] <= vreg_lrs[j].live_after
- && ss_busy_until_before[k+1] <= vreg_lrs[j].live_after
- && ss_busy_until_before[k+2] <= vreg_lrs[j].live_after
- && ss_busy_until_before[k+3] <= vreg_lrs[j].live_after)
- break;
- if (k >= N_SPILL64S-3) {
- vpanic("LibVEX_N_SPILL_BYTES is too low. "
- "Increase and recompile.");
- }
- if (0) vex_printf("32-byte spill offset in spill slot %d\n",
- (Int)k);
- ss_busy_until_before[k+0] = vreg_lrs[j].dead_before;
- ss_busy_until_before[k+1] = vreg_lrs[j].dead_before;
- ss_busy_until_before[k+2] = vreg_lrs[j].dead_before;
- ss_busy_until_before[k+3] = vreg_lrs[j].dead_before;
- break;
-
case HRcVec128: case HRcFlt64:
/* Find two adjacent free slots in which between them
provide up to 128 bits in which to spill the vreg.
Modified: trunk/priv/host_amd64_isel.c (+0 -75)
===================================================================
--- trunk/priv/host_amd64_isel.c 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_amd64_isel.c 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -196,14 +196,7 @@
return reg;
}
-static HReg newVRegDV ( ISelEnv* env )
-{
- HReg reg = mkHReg(env->vreg_ctr, HRcVec256, True/*virtual reg*/);
- env->vreg_ctr++;
- return reg;
-}
-
/*---------------------------------------------------------*/
/*--- ISEL: Forward declarations ---*/
/*---------------------------------------------------------*/
@@ -246,9 +239,6 @@
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
-static HReg iselV256Expr_wrk ( ISelEnv* env, IRExpr* e );
-static HReg iselV256Expr ( ISelEnv* env, IRExpr* e );
-
static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
ISelEnv* env, IRExpr* e );
static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
@@ -325,15 +315,6 @@
return AMD64Instr_SseReRg(Asse_MOV, src, dst);
}
-/* Make a double-vector (256 bit) reg-reg move. */
-
-static AMD64Instr* mk_dvMOVsd_RR ( HReg src, HReg dst )
-{
- vassert(hregClass(src) == HRcVec256);
- vassert(hregClass(dst) == HRcVec256);
- return AMD64Instr_AvxReRg(Asse_MOV, src, dst);
-}
-
/* Advance/retreat %rsp by n. */
static void add_to_rsp ( ISelEnv* env, Int n )
@@ -3381,62 +3362,6 @@
/*---------------------------------------------------------*/
-/*--- ISEL: SIMD (V256) expressions, 256 bit. ---*/
-/*---------------------------------------------------------*/
-
-static HReg iselV256Expr ( ISelEnv* env, IRExpr* e )
-{
- HReg r = iselV256Expr_wrk( env, e );
-# if 0
- vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
-# endif
- vassert(hregClass(r) == HRcVec256);
- vassert(hregIsVirtual(r));
- return r;
-}
-
-
-/* DO NOT CALL THIS DIRECTLY */
-static HReg iselV256Expr_wrk ( ISelEnv* env, IRExpr* e )
-{
- //HWord fn = 0; /* address of helper fn, if required */
- //Bool arg1isEReg = False;
- //AMD64SseOp op = Asse_INVALID;
- IRType ty = typeOfIRExpr(env->type_env,e);
- vassert(e);
- vassert(ty == Ity_V256);
-#if 0
- if (e->tag == Iex_RdTmp) {
- return lookupIRTemp(env, e->Iex.RdTmp.tmp);
- }
-
- if (e->tag == Iex_Get) {
- HReg dst = newVRegDV(env);
- addInstr(env, AMD64Instr_AvxLdSt(
- True/*load*/,
- dst,
- AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
- )
- );
- return dst;
- }
-
- if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
- HReg dst = newVRegDV(env);
- AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
- addInstr(env, AMD64Instr_AvxLdSt( True/*load*/, dst, am ));
- return dst;
- }
-#endif
- //avx_fail:
- vex_printf("iselV256Expr (amd64, subarch = %s): can't reduce\n",
- LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
- ppIRExpr(e);
- vpanic("iselV256Expr_wrk");
-}
-
-
-/*---------------------------------------------------------*/
/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
/*---------------------------------------------------------*/
Modified: trunk/priv/host_generic_regs.h (+2 -4)
===================================================================
--- trunk/priv/host_generic_regs.h 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_generic_regs.h 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -87,7 +87,6 @@
so won't fit in a 64-bit slot)
HRcVec64 64 bits
HRcVec128 128 bits
- HRcVec256 256 bits
If you add another regclass, you must remember to update
host_generic_reg_alloc2.c accordingly.
@@ -100,8 +99,7 @@
HRcFlt32=5, /* 32-bit float */
HRcFlt64=6, /* 64-bit float */
HRcVec64=7, /* 64-bit SIMD */
- HRcVec128=8, /* 128-bit SIMD */
- HRcVec256=9
+ HRcVec128=8 /* 128-bit SIMD */
}
HRegClass;
@@ -124,7 +122,7 @@
static inline HRegClass hregClass ( HReg r ) {
UInt rc = r;
rc = (rc >> 28) & 0x0F;
- vassert(rc >= HRcInt32 && rc <= HRcVec256);
+ vassert(rc >= HRcInt32 && rc <= HRcVec128);
return (HRegClass)rc;
}
Modified: trunk/priv/host_generic_regs.c (+0 -2)
===================================================================
--- trunk/priv/host_generic_regs.c 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_generic_regs.c 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -49,7 +49,6 @@
case HRcFlt64: vex_printf("HRcFlt64"); break;
case HRcVec64: vex_printf("HRcVec64"); break;
case HRcVec128: vex_printf("HRcVec128"); break;
- case HRcVec256: vex_printf("HRcVec256"); break;
default: vpanic("ppHRegClass");
}
}
@@ -66,7 +65,6 @@
case HRcFlt64: vex_printf("%%%sD%d", maybe_v, regNo); return;
case HRcVec64: vex_printf("%%%sv%d", maybe_v, regNo); return;
case HRcVec128: vex_printf("%%%sV%d", maybe_v, regNo); return;
- case HRcVec256: vex_printf("%%%sDV%d", maybe_v, regNo); return;
default: vpanic("ppHReg");
}
}
Modified: trunk/priv/host_amd64_defs.h (+15 -20)
===================================================================
--- trunk/priv/host_amd64_defs.h 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_amd64_defs.h 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -82,12 +82,7 @@
extern HReg hregAMD64_XMM11 ( void );
extern HReg hregAMD64_XMM12 ( void );
-extern HReg hregAMD64_YMM2 ( void );
-extern HReg hregAMD64_YMM13 ( void );
-extern HReg hregAMD64_YMM14 ( void );
-extern HReg hregAMD64_YMM15 ( void );
-
/* --------- Condition codes, AMD encoding. --------- */
typedef
@@ -400,9 +395,9 @@
Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
Ain_SseCMov, /* SSE conditional move */
Ain_SseShuf, /* SSE2 shuffle (pshufd) */
- Ain_AvxLdSt, /* AVX load/store 256 bits,
- no alignment constraints */
- Ain_AvxReRg, /* AVX binary general reg-reg, Re, Rg */
+ //uu Ain_AvxLdSt, /* AVX load/store 256 bits,
+ //uu no alignment constraints */
+ //uu Ain_AvxReRg, /* AVX binary general reg-reg, Re, Rg */
Ain_EvCheck, /* Event check */
Ain_ProfInc /* 64-bit profile counter increment */
}
@@ -668,17 +663,17 @@
HReg src;
HReg dst;
} SseShuf;
+ //uu struct {
+ //uu Bool isLoad;
+ //uu HReg reg;
+ //uu AMD64AMode* addr;
+ //uu } AvxLdSt;
+ //uu struct {
+ //uu AMD64SseOp op;
+ //uu HReg src;
+ //uu HReg dst;
+ //uu } AvxReRg;
struct {
- Bool isLoad;
- HReg reg;
- AMD64AMode* addr;
- } AvxLdSt;
- struct {
- AMD64SseOp op;
- HReg src;
- HReg dst;
- } AvxReRg;
- struct {
AMD64AMode* amCounter;
AMD64AMode* amFailAddr;
} EvCheck;
@@ -740,8 +735,8 @@
extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad, HReg, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp, HReg, HReg );
+//uu extern AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad, HReg, AMD64AMode* );
+//uu extern AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
AMD64AMode* amFailAddr );
extern AMD64Instr* AMD64Instr_ProfInc ( void );
Modified: trunk/priv/host_amd64_defs.c (+166 -183)
===================================================================
--- trunk/priv/host_amd64_defs.c 2012-05-27 14:50:42 +01:00 (rev 2354)
+++ trunk/priv/host_amd64_defs.c 2012-05-27 17:18:13 +01:00 (rev 2355)
@@ -72,11 +72,6 @@
vassert(r >= 0 && r < 16);
vex_printf("%%xmm%d", r);
return;
- case HRcVec256:
- r = hregNumber(reg);
- vassert(r >= 0 && r < 16);
- vex_printf("%%ymm%d", r);
- return;
default:
vpanic("ppHRegAMD64");
}
@@ -136,12 +131,7 @@
HReg hregAMD64_XMM11 ( void ) { return mkHReg(11, HRcVec128, False); }
HReg hregAMD64_XMM12 ( void ) { return mkHReg(12, HRcVec128, False); }
-HReg hregAMD64_YMM2 ( void ) { return mkHReg( 2, HRcVec256, False); }
-HReg hregAMD64_YMM13 ( void ) { return mkHReg(13, HRcVec256, False); }
-HReg hregAMD64_YMM14 ( void ) { return mkHReg(14, HRcVec256, False); }
-HReg hregAMD64_YMM15 ( void ) { return mkHReg(15, HRcVec256, False); }
-
void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
{
#if 0
@@ -986,23 +976,23 @@
vassert(order >= 0 && order <= 0xFF);
return i;
}
-AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
- HReg reg, AMD64AMode* addr ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_AvxLdSt;
- i->Ain.AvxLdSt.isLoad = isLoad;
- i->Ain.AvxLdSt.reg = reg;
- i->Ain.AvxLdSt.addr = addr;
- return i;
-}
-AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_AvxReRg;
- i->Ain.AvxReRg.op = op;
- i->Ain.AvxReRg.src = re;
- i->Ain.AvxReRg.dst = rg;
- return i;
-}
+//uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
+//uu HReg reg, AMD64AMode* addr ) {
+//uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//uu i->tag = Ain_AvxLdSt;
+//uu i->Ain.AvxLdSt.isLoad = isLoad;
+//uu i->Ain.AvxLdSt.reg = reg;
+//uu i->Ain.AvxLdSt.addr = addr;
+//uu return i;
+//uu }
+//uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
+//uu AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+//uu i->tag = Ain_AvxReRg;
+//uu i->Ain.AvxReRg.op = op;
+//uu i->Ain.AvxReRg.src = re;
+//uu i->Ain.AvxReRg.dst = rg;
+//uu return i;
+//uu }
AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
AMD64AMode* amFailAddr ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1298,25 +1288,24 @@
vex_printf(",");
ppHRegAMD64(i->Ain.SseShuf.dst);
return;
-
- case Ain_AvxLdSt:
- vex_printf("vmovups ");
- if (i->Ain.AvxLdSt.isLoad) {
- ppAMD64AMode(i->Ain.AvxLdSt.addr);
- vex_printf(",");
- ppHRegAMD64(i->Ain.AvxLdSt.reg);
- } else {
- ppHRegAMD64(i->Ain.AvxLdSt.reg);
- vex_printf(",");
- ppAMD64AMode(i->Ain.AvxLdSt.addr);
- }
- return;
- case Ain_AvxReRg:
- vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
- ppHRegAMD64(i->Ain.AvxReRg.src);
- vex_printf(",");
- ppHRegAMD64(i->Ain.AvxReRg.dst);
- return;
+ //uu case Ain_AvxLdSt:
+ //uu vex_printf("vmovups ");
+ //uu if (i->Ain.AvxLdSt.isLoad) {
+ //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
+ //uu vex_printf(",");
+ //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
+ //uu } else {
+ //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
+ //uu vex_printf(",");
+ //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
+ //uu }
+ //uu return;
+ //uu case Ain_AvxReRg:
+ //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
+ //uu ppHRegAMD64(i->Ain.AvxReRg.src);
+ //uu vex_printf(",");
+ //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
+ //uu return;
case Ain_EvCheck:
vex_printf("(evCheck) decl ");
ppAMD64AMode(i->Ain.EvCheck.amCounter);
@@ -1402,7 +1391,7 @@
/* First off, claim it trashes all the caller-saved regs
which fall within the register allocator's jurisdiction.
These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
- and all the xmm/ymm registers.
+ and all the xmm registers.
*/
addHRegUse(u, HRmWrite, hregAMD64_RAX());
addHRegUse(u, HRmWrite, hregAMD64_RCX());
@@ -1425,10 +1414,6 @@
addHRegUse(u, HRmWrite, hregAMD64_XMM10());
addHRegUse(u, HRmWrite, hregAMD64_XMM11());
addHRegUse(u, HRmWrite, hregAMD64_XMM12());
- addHRegUse(u, HRmWrite, hregAMD64_YMM2());
- addHRegUse(u, HRmWrite, hregAMD64_YMM13());
- addHRegUse(u, HRmWrite, hregAMD64_YMM14());
- addHRegUse(u, HRmWrite, hregAMD64_YMM15());
/* Now we have to state any parameter-carrying registers
which might be read. This depends on the regparmness. */
@@ -1609,24 +1594,24 @@
addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
return;
- case Ain_AvxLdSt:
- addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
- addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
- i->Ain.AvxLdSt.reg);
- return;
- case Ain_AvxReRg:
- if ( (i->Ain.AvxReRg.op == Asse_XOR
- || i->Ain.AvxReRg.op == Asse_CMPEQ32)
- && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
- /* See comments on the case for Ain_SseReRg. */
- addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
- } else {
- addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
- addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
- ? HRmWrite : HRmModify,
- i->Ain.AvxReRg.dst);
- }
- return;
+ //uu case Ain_AvxLdSt:
+ //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
+ //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
+ //uu i->Ain.AvxLdSt.reg);
+ //uu return;
+ //uu case Ain_AvxReRg:
+ //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
+ //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
+ //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
+ //uu /* See comments on the case for Ain_SseReRg. */
+ //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
+ //uu } else {
+ //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
+ //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
+ //uu ? HRmWrite : HRmModify,
+ //uu i->Ain.AvxReRg.dst);
+ //uu }
+ //uu return;
case Ain_EvCheck:
/* We expect both amodes only to mention %rbp, so this is in
fact pointless, since %rbp isn't allocatable, but anyway.. */
@@ -1802,14 +1787,14 @@
mapReg(m, &i->Ain.SseShuf.src);
mapReg(m, &i->Ain.SseShuf.dst);
return;
- case Ain_AvxLdSt:
- mapReg(m, &i->Ain.AvxLdSt.reg);
- mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
- break;
- case Ain_AvxReRg:
- mapReg(m, &i->Ain.AvxReRg.src);
- mapReg(m, &i->Ain.AvxReRg.dst);
- return;
+ //uu case Ain_AvxLdSt:
+ //uu mapReg(m, &i->Ain.AvxLdSt.reg);
+ //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
+ //uu break;
+ //uu case Ain_AvxReRg:
+ //uu mapReg(m, &i->Ain.AvxReRg.src);
+ //uu mapReg(m, &i->Ain.AvxReRg.dst);
+ //uu return;
case Ain_EvCheck:
/* We expect both amodes only to mention %rbp, so this is in
fact pointless, since %rbp isn't allocatable, but anyway.. */
@@ -1848,13 +1833,13 @@
*src = i->Ain.SseReRg.src;
*dst = i->Ain.SseReRg.dst;
return True;
- case Ain_AvxReRg:
- /* Moves between AVX regs */
- if (i->Ain.AvxReRg.op != Asse_MOV)
- return False;
- *src = i->Ain.AvxReRg.src;
- *dst = i->Ain.AvxReRg.dst;
- return True;
+ //uu case Ain_AvxReRg:
+ //uu /* Moves between AVX regs */
+ //uu if (i->Ain.AvxReRg.op != Asse_MOV)
+ //uu return False;
+ //uu *src = i->Ain.AvxReRg.src;
+ //uu *dst = i->Ain.AvxReRg.dst;
+ //uu return True;
default:
return False;
}
@@ -1882,9 +1867,6 @@
case HRcVec128:
*i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
return;
- case HRcVec256:
- *i1 = AMD64Instr_AvxLdSt ( False/*store*/, rreg, am );
- return;
default:
ppHRegClass(hregClass(rreg));
vpanic("genSpill_AMD64: unimplemented regclass");
@@ -1907,9 +1889,6 @@
case HRcVec128:
*i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
return;
- case HRcVec256:
- *i1 = AMD64Instr_AvxLdSt ( True/*load*/, rreg, am );
- return;
default:
ppHRegClass(hregClass(rreg));
vpanic("genReload_AMD64: unimplemented regclass");
@@ -1967,16 +1946,16 @@
return mkHReg(n, HRcInt64, False);
}
-/* Ditto for ymm regs. */
-static UInt dvreg2ireg ( HReg r )
-{
- UInt n;
- vassert(hregClass(r) == HRcVec256);
- vassert(!hregIsVirtual(r));
- n = hregNumber(r);
- vassert(n <= 15);
- return mkHReg(n, HRcInt64, False);
-}
+//uu /* Ditto for ymm regs. */
+//uu static UInt dvreg2ireg ( HReg r )
+//uu {
+//uu UInt n;
+//uu vassert(hregClass(r) == HRcVec256);
+//uu vassert(!hregIsVirtual(r));
+//uu n = hregNumber(r);
+//uu vassert(n <= 15);
+//uu return mkHReg(n, HRcInt64, False);
+//uu }
static UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem )
{
@@ -2180,83 +2159,87 @@
}
-/* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
- notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
- in verbatim. There's no range checking on the bits. */
-static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
- UInt mmmmm, UInt rexW, UInt notVvvv,
- UInt L, UInt pp )
-{
- UChar byte0 = 0;
- UChar byte1 = 0;
- UChar byte2 = 0;
- if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
- /* 2 byte encoding is possible. */
- byte0 = 0xC5;
- byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
- | (L << 2) | pp;
- } else {
- /* 3 byte encoding is needed. */
- byte0 = 0xC4;
- byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
- | ((rexB ^ 1) << 5) | mmmmm;
- byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
- }
- return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
-}
+//uu /* May 2012: this VEX prefix stuff is currently unused, but has
+//uu verified correct (I reckon). Certainly it has been known to
+//uu produce correct VEX prefixes during testing. */
+//uu
+//uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
+//uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
+//uu in verbatim. There's no range checking on the bits. */
+//uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
+//uu UInt mmmmm, UInt rexW, UInt notVvvv,
+//uu UInt L, UInt pp )
+//uu {
+//uu UChar byte0 = 0;
+//uu UChar byte1 = 0;
+//uu UChar byte2 = 0;
+//uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
+//uu /* 2 byte encoding is possible. */
+//uu byte0 = 0xC5;
+//uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
+//uu | (L << 2) | pp;
+//uu } else {
+//uu /* 3 byte encoding is needed. */
+//uu byte0 = 0xC4;
+//uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
+//uu | ((rexB ^ 1) << 5) | mmmmm;
+//uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
+//uu }
+//uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
+//uu }
+//uu
+//uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
+//uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
+//uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
+//uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
+//uu vvvv=1111 (unused 3rd reg). */
+//uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
+//uu {
+//uu UChar L = 1; /* size = 256 */
+//uu UChar pp = 0; /* no SIMD prefix */
+//uu UChar mmmmm = 1; /* 0F */
+//uu UChar notVvvv = 0; /* unused */
+//uu UChar rexW = 0;
+//uu UChar rexR = 0;
+//uu UChar rexX = 0;
+//uu UChar rexB = 0;
+//uu /* Same logic as in rexAMode_M. */
+//uu if (am->tag == Aam_IR) {
+//uu rexR = iregBit3(greg);
+//uu rexX = 0; /* not relevant */
+//uu rexB = iregBit3(am->Aam.IR.reg);
+//uu }
+//uu else if (am->tag == Aam_IRRS) {
+//uu rexR = iregBit3(greg);
+//uu rexX = iregBit3(am->Aam.IRRS.index);
+//uu rexB = iregBit3(am->Aam.IRRS.base);
+//uu } else {
+//uu vassert(0);
+//uu }
+//uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
+//uu }
+//uu
+//uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
+//uu {
+//uu switch (vex & 0xFF) {
+//uu case 0xC5:
+//uu *p++ = 0xC5;
+//uu *p++ = (vex >> 8) & 0xFF;
+//uu vassert(0 == (vex >> 16));
+//uu break;
+//uu case 0xC4:
+//uu *p++ = 0xC4;
+//uu *p++ = (vex >> 8) & 0xFF;
+//uu *p++ = (vex >> 16) & 0xFF;
+//uu vassert(0 == (vex >> 24));
+//uu break;
+//uu default:
+//uu vassert(0);
+//uu }
+//uu return p;
+//uu }
-/* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
- 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
- 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
- indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
- vvvv=1111 (unused 3rd reg). */
-static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
-{
- UChar L = 1; /* size = 256 */
- UChar pp = 0; /* no SIMD prefix */
- UChar mmmmm = 1; /* 0F */
- UChar notVvvv = 0; /* unused */
- UChar rexW = 0;
- UChar rexR = 0;
- UChar rexX = 0;
- UChar rexB = 0;
- /* Same logic as in rexAMode_M. */
- if (am->tag == Aam_IR) {
- rexR = iregBit3(greg);
- rexX = 0; /* not relevant */
- rexB = iregBit3(am->Aam.IR.reg);
- }
- else if (am->tag == Aam_IRRS) {
- rexR = iregBit3(greg);
- rexX = iregBit3(am->Aam.IRRS.index);
- rexB = iregBit3(am->Aam.IRRS.base);
- } else {
- vassert(0);
- }
- return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
-}
-static UChar* emitVexPrefix ( UChar* p, UInt vex )
-{
- switch (vex & 0xFF) {
- case 0xC5:
- *p++ = 0xC5;
- *p++ = (vex >> 8) & 0xFF;
- vassert(0 == (vex >> 16));
- break;
- case 0xC4:
- *p++ = 0xC4;
- *p++ = (vex >> 8) & 0xFF;
- *p++ = (vex >> 16) & 0xFF;
- vassert(0 == (vex >> 24));
- break;
- default:
- vassert(0);
- }
- return p;
-}
-
-
/* Emit ffree %st(N) */
static UChar* do_ffree_st ( UChar* p, Int n )
{
@@ -3432,14 +3415,14 @@
*p++ = (UChar)(i->Ain.SseShuf.order);
goto done;
- case Ain_AvxLdSt: {
- UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
- i->Ain.AvxLdSt.addr );
- p = emitVexPrefix(p, vex);
- *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
- p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
- goto done;
- }
+ //uu case Ain_AvxLdSt: {
+ //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
+ //uu i->Ain.AvxLdSt.addr );
+ //uu p = emitVexPrefix(p, vex);
+ //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
+ //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
+ //uu goto done;
+ //uu }
case Ain_EvCheck: {
/* We generate:
|