|
From: <sv...@va...> - 2015-01-27 23:17:09
|
Author: sewardj
Date: Tue Jan 27 23:17:02 2015
New Revision: 3075
Log:
AMD64 front end: translate AVX2 PMASKMOV load instructions (vector
conditional loads) using IR conditional load statements IRLoadG rather
than the previous rather ingenious hack.
AMD64 back end:
* Add instruction selection etc for 32- and 64-bit conditional loads (IRLoadG)
* Handle dirty helper calls that return a value and that are conditional. These
result from Memcheck's instrumentation of IRLoadGs.
No functional change. This is a cleanup as part of supporting AVX2
PMASKMOV loads and stores by using the existing IR facilities for
conditional loads and stores.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/host_amd64_defs.c
trunk/priv/host_amd64_defs.h
trunk/priv/host_amd64_isel.c
Modified: trunk/priv/guest_amd64_toIR.c
==============================================================================
--- trunk/priv/guest_amd64_toIR.c (original)
+++ trunk/priv/guest_amd64_toIR.c Tue Jan 27 23:17:02 2015
@@ -27255,6 +27255,9 @@
}
delta += alen;
+ for (i = 0; i < sizeof(res)/sizeof(res[0]); i++)
+ res[i] = IRTemp_INVALID;
+
for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
res[i] = newTemp(ty);
cond = newTemp(Ity_I1);
@@ -27263,19 +27266,15 @@
ty == Ity_I32 ? getYMMRegLane32( rV, i )
: getYMMRegLane64( rV, i ),
mkU(ty, 0) ));
- assign( res[i],
- IRExpr_ITE(
- mkexpr(cond),
- loadLE(ty, IRExpr_ITE(
- mkexpr(cond),
- binop(Iop_Add64, mkexpr(addr),
- mkU64(i*(ty == Ity_I32 ? 4 : 8))),
- getIReg64(R_RSP)
- )
- ),
- mkU(ty, 0)
- )
- );
+ stmt(
+ IRStmt_LoadG(
+ Iend_LE,
+ ty == Ity_I32 ? ILGop_Ident32 : ILGop_Ident64,
+ res[i],
+ binop(Iop_Add64, mkexpr(addr), mkU64(i * (ty == Ity_I32 ? 4 : 8))),
+ ty == Ity_I32 ? mkU32(0) : mkU64(0),
+ mkexpr(cond)
+ ));
}
switch (ty) {
case Ity_I32:
Modified: trunk/priv/host_amd64_defs.c
==============================================================================
--- trunk/priv/host_amd64_defs.c (original)
+++ trunk/priv/host_amd64_defs.c Tue Jan 27 23:17:02 2015
@@ -745,6 +745,17 @@
vassert(cond != Acc_ALWAYS);
return i;
}
+AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
+ AMD64AMode* addr, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_CLoad;
+ i->Ain.CLoad.cond = cond;
+ i->Ain.CLoad.szB = szB;
+ i->Ain.CLoad.addr = addr;
+ i->Ain.CLoad.dst = dst;
+ vassert(cond != Acc_ALWAYS);
+ return i;
+}
AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_MovxLQ;
@@ -1121,6 +1132,16 @@
vex_printf(",");
ppHRegAMD64(i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.CLoad.cond));
+ vex_printf("mov%c (", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
+ ppAMD64AMode(i->Ain.CLoad.addr);
+ vex_printf("), ");
+ (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.CLoad.dst);
+ vex_printf(" }");
+ return;
case Ain_MovxLQ:
vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
@@ -1463,6 +1484,10 @@
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
+ addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
+ return;
case Ain_MovxLQ:
addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
@@ -1695,6 +1720,10 @@
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
mapReg(m, &i->Ain.CMov64.dst);
return;
+ case Ain_CLoad:
+ mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
+ mapReg(m, &i->Ain.CLoad.dst);
+ return;
case Ain_MovxLQ:
mapReg(m, &i->Ain.MovxLQ.src);
mapReg(m, &i->Ain.MovxLQ.dst);
@@ -2671,43 +2700,113 @@
}
case Ain_Call: {
- if (i->Ain.Call.cond != Acc_ALWAYS
- && i->Ain.Call.rloc.pri != RLPri_None) {
- /* The call might not happen (it isn't unconditional) and it
- returns a result. In this case we will need to generate a
- control flow diamond to put 0x555..555 in the return
- register(s) in the case where the call doesn't happen. If
- this ever becomes necessary, maybe copy code from the ARM
- equivalent. Until that day, just give up. */
- goto bad;
- }
- /* As per detailed comment for Ain_Call in
- getRegUsage_AMD64Instr above, %r11 is used as an address
- temporary. */
- /* jump over the following two insns if the condition does not
- hold */
- Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
- if (i->Ain.Call.cond != Acc_ALWAYS) {
- *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
- *p++ = shortImm ? 10 : 13;
- /* 10 or 13 bytes in the next two insns */
- }
- if (shortImm) {
- /* 7 bytes: movl sign-extend(imm32), %r11 */
- *p++ = 0x49;
- *p++ = 0xC7;
- *p++ = 0xC3;
- p = emit32(p, (UInt)i->Ain.Call.target);
+ /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
+ above, %r11 is used as an address temporary. */
+ /* If we don't need to do any fixup actions in the case that the
+ call doesn't happen, just do the simple thing and emit
+ straight-line code. This is usually the case. */
+ if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
+ || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
+ /* jump over the following two insns if the condition does
+ not hold */
+ Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
+ if (i->Ain.Call.cond != Acc_ALWAYS) {
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+ *p++ = shortImm ? 10 : 13;
+ /* 10 or 13 bytes in the next two insns */
+ }
+ if (shortImm) {
+ /* 7 bytes: movl sign-extend(imm32), %r11 */
+ *p++ = 0x49;
+ *p++ = 0xC7;
+ *p++ = 0xC3;
+ p = emit32(p, (UInt)i->Ain.Call.target);
+ } else {
+ /* 10 bytes: movabsq $target, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, i->Ain.Call.target);
+ }
+ /* 3 bytes: call *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
} else {
- /* 10 bytes: movabsq $target, %r11 */
+ Int delta;
+ /* Complex case. We have to generate an if-then-else diamond. */
+ // before:
+ // j{!cond} else:
+ // movabsq $target, %r11
+ // call* %r11
+ // preElse:
+ // jmp after:
+ // else:
+ // movabsq $0x5555555555555555, %rax // possibly
+ // movq %rax, %rdx // possibly
+ // after:
+
+ // before:
+ UChar* pBefore = p;
+
+ // j{!cond} else:
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ // movabsq $target, %r11
*p++ = 0x49;
*p++ = 0xBB;
p = emit64(p, i->Ain.Call.target);
+
+ // call* %r11
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xD3;
+
+ // preElse:
+ UChar* pPreElse = p;
+
+ // jmp after:
+ *p++ = 0xEB;
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ // else:
+ UChar* pElse = p;
+
+ /* Do the 'else' actions */
+ switch (i->Ain.Call.rloc.pri) {
+ case RLPri_Int:
+ // movabsq $0x5555555555555555, %rax
+ *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+ break;
+ case RLPri_2Int:
+ vassert(0); //ATC
+ // movabsq $0x5555555555555555, %rax
+ *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+ // movq %rax, %rdx
+ *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
+ case RLPri_None: case RLPri_INVALID: default:
+ vassert(0);
+ }
+
+ // after:
+ UChar* pAfter = p;
+
+ // Fix up the branch offsets. The +2s in the offset
+ // calculations are there because x86 requires conditional
+ // branches to have their offset stated relative to the
+ // instruction immediately following the branch insn. And in
+ // both cases the branch insns are 2 bytes long.
+
+ // First, the "j{!cond} else:" at pBefore.
+ delta = (Int)(Long)(pElse - (pBefore + 2));
+ vassert(delta >= 0 && delta < 100/*arbitrary*/);
+ *(pBefore+1) = (UChar)delta;
+
+ // And secondly, the "jmp after:" at pPreElse.
+ delta = (Int)(Long)(pAfter - (pPreElse + 2));
+ vassert(delta >= 0 && delta < 100/*arbitrary*/);
+ *(pPreElse+1) = (UChar)delta;
}
- /* 3 bytes: call *%r11 */
- *p++ = 0x41;
- *p++ = 0xFF;
- *p++ = 0xD3;
goto done;
}
@@ -2917,6 +3016,35 @@
}
break;
+ case Ain_CLoad: {
+ vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
+
+ /* Only 32- or 64-bit variants are allowed. */
+ vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+ /* Now the load. Either a normal 64 bit load or a normal 32 bit
+ load, which, by the default zero-extension rule, zeroes out
+ the upper half of the destination, as required. */
+ rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+ *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
+ *p++ = 0x8B;
+ p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+
+ /* Fix up the conditional branch */
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ goto done;
+ }
+
case Ain_MovxLQ:
/* No, _don't_ ask me why the sense of the args has to be
different in the S vs Z case. I don't know. */
Modified: trunk/priv/host_amd64_defs.h
==============================================================================
--- trunk/priv/host_amd64_defs.h (original)
+++ trunk/priv/host_amd64_defs.h Tue Jan 27 23:17:02 2015
@@ -368,6 +368,7 @@
Ain_XIndir, /* indirect transfer to GA */
Ain_XAssisted, /* assisted transfer to GA */
Ain_CMov64, /* conditional move */
+ Ain_CLoad, /* cond. load to int reg, 32 bit ZX or 64 bit only */
Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
Ain_Store, /* store 32/16/8 bit value in memory */
@@ -505,6 +506,14 @@
AMD64RM* src;
HReg dst;
} CMov64;
+ /* conditional load to int reg, 32 bit ZX or 64 bit only.
+ cond may not be Acc_ALWAYS. */
+ struct {
+ AMD64CondCode cond;
+ UChar szB; /* 4 or 8 only */
+ AMD64AMode* addr;
+ HReg dst;
+ } CLoad;
/* reg-reg move, sx-ing/zx-ing top half */
struct {
Bool syned;
@@ -710,6 +719,8 @@
extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
AMD64CondCode cond, IRJumpKind jk );
extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
+extern AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
+ AMD64AMode* addr, HReg dst );
extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
AMD64AMode* src, HReg dst );
Modified: trunk/priv/host_amd64_isel.c
==============================================================================
--- trunk/priv/host_amd64_isel.c (original)
+++ trunk/priv/host_amd64_isel.c Tue Jan 27 23:17:02 2015
@@ -4288,6 +4288,32 @@
switch (stmt->tag) {
+ /* --------- LOADG (guarded load) --------- */
+ case Ist_LoadG: {
+ IRLoadG* lg = stmt->Ist.LoadG.details;
+ if (lg->end != Iend_LE)
+ goto stmt_fail;
+
+ UChar szB = 0; /* invalid */
+ switch (lg->cvt) {
+ case ILGop_Ident32: szB = 4; break;
+ case ILGop_Ident64: szB = 8; break;
+ default: break;
+ }
+ if (szB == 0)
+ goto stmt_fail;
+
+ AMD64AMode* amAddr = iselIntExpr_AMode(env, lg->addr);
+ HReg rAlt = iselIntExpr_R(env, lg->alt);
+ HReg rDst = lookupIRTemp(env, lg->dst);
+ /* Get the alt value into the dst. We'll do a conditional load
+ which overwrites it -- or not -- with loaded data. */
+ addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
+ AMD64CondCode cc = iselCondCode(env, lg->guard);
+ addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
+ return;
+ }
+
/* --------- STORE --------- */
case Ist_Store: {
IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
|