|
From: <sv...@va...> - 2012-06-25 07:46:31
|
sewardj 2012-06-25 08:46:18 +0100 (Mon, 25 Jun 2012)
New Revision: 2412
Log:
Add IR ops Iop_CmpNEZ32x8 and Iop_CmpNEZ64x4, needed for Memcheck
instrumentation of 256 bit vector arithmetic.
Modified files:
trunk/priv/host_amd64_isel.c
trunk/priv/ir_defs.c
trunk/pub/libvex_ir.h
Modified: trunk/pub/libvex_ir.h (+3 -0)
===================================================================
--- trunk/pub/libvex_ir.h 2012-06-25 08:40:54 +01:00 (rev 2411)
+++ trunk/pub/libvex_ir.h 2012-06-25 08:46:18 +01:00 (rev 2412)
@@ -1441,6 +1441,9 @@
Iop_XorV256,
Iop_NotV256,
+ /* MISC (vector integer cmp != 0) */
+ Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
+
/* ------------------ 256-bit SIMD FP. ------------------ */
Iop_Add64Fx4,
Iop_Sub64Fx4,
Modified: trunk/priv/ir_defs.c (+3 -0)
===================================================================
--- trunk/priv/ir_defs.c 2012-06-25 08:40:54 +01:00 (rev 2411)
+++ trunk/priv/ir_defs.c 2012-06-25 08:46:18 +01:00 (rev 2412)
@@ -1008,6 +1008,8 @@
case Iop_OrV256: vex_printf("OrV256"); return;
case Iop_XorV256: vex_printf("XorV256"); return;
case Iop_NotV256: vex_printf("NotV256"); return;
+ case Iop_CmpNEZ64x4: vex_printf("CmpNEZ64x4"); return;
+ case Iop_CmpNEZ32x8: vex_printf("CmpNEZ32x8"); return;
default: vpanic("ppIROp(1)");
}
@@ -2828,6 +2830,7 @@
case Iop_Sqrt32Fx8:
case Iop_Sqrt64Fx4:
case Iop_Recip32Fx8:
+ case Iop_CmpNEZ64x4: case Iop_CmpNEZ32x8:
UNARY(Ity_V256, Ity_V256);
default:
Modified: trunk/priv/host_amd64_isel.c (+45 -0)
===================================================================
--- trunk/priv/host_amd64_isel.c 2012-06-25 08:40:54 +01:00 (rev 2411)
+++ trunk/priv/host_amd64_isel.c 2012-06-25 08:46:18 +01:00 (rev 2412)
@@ -3474,6 +3474,51 @@
return;
}
+ case Iop_CmpNEZ64x4: {
+ /* We can use SSE2 instructions for this. */
+ /* Same scheme as Iop_CmpNEZ64x2, except twice as wide
+ (obviously). See comment on Iop_CmpNEZ64x2 for
+ explanation of what's going on here. */
+ HReg argHi, argLo;
+ iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
+ HReg tmpHi = generate_zeroes_V128(env);
+ HReg tmpLo = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
+ HReg dstHi = newVRegV(env);
+ HReg dstLo = newVRegV(env);
+ addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
+ addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
+ tmpHi = do_sse_NotV128(env, tmpHi);
+ tmpLo = do_sse_NotV128(env, tmpLo);
+ addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
+ addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
+ addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
+ addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
+ case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
+ do_CmpNEZ_vector:
+ {
+ HReg argHi, argLo;
+ iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
+ HReg tmpHi = newVRegV(env);
+ HReg tmpLo = newVRegV(env);
+ HReg zero = generate_zeroes_V128(env);
+ HReg dstHi, dstLo;
+ addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
+ addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
+ addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
+ addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
+ dstHi = do_sse_NotV128(env, tmpHi);
+ dstLo = do_sse_NotV128(env, tmpLo);
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
default:
break;
} /* switch (e->Iex.Unop.op) */
|