|
From: <sv...@va...> - 2012-11-20 15:24:36
|
sewardj 2012-11-20 15:24:24 +0000 (Tue, 20 Nov 2012)
New Revision: 2562
Log:
Add a special-case implementation of PCMPISTRI $0x3A, which generates
in-line IR instead of calling helpers. This is so that Memcheck can
do exact definedness propagation through it. This is important for
dealing with inlined PCMPISTRI-based strlen calls.
#309921, comment 6. (Patrick J. LoPresti , lop...@gm...)
Modified files:
trunk/priv/guest_amd64_toIR.c
Modified: trunk/priv/guest_amd64_toIR.c (+150 -0)
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2012-11-19 16:29:31 +00:00 (rev 2561)
+++ trunk/priv/guest_amd64_toIR.c 2012-11-20 15:24:24 +00:00 (rev 2562)
@@ -16875,7 +16875,152 @@
return delta;
}
+/* Returns Ity_I32 */
+static IRExpr *IRExpr_getMSBs16x8(IRExpr *exp)
+{
+ IRTemp lo = newTemp(Ity_I64);
+ IRTemp hi = newTemp(Ity_I64);
+ assign(lo, unop(Iop_V128to64, exp));
+ assign(hi, unop(Iop_V128HIto64, exp));
+ return unop(Iop_16Uto32,
+ binop(Iop_8HLto16,
+ unop(Iop_GetMSBs8x8, mkexpr(hi)),
+ unop(Iop_GetMSBs8x8, mkexpr(lo))));
+}
+static IRExpr *IRExpr_ctz32(IRExpr *exp)
+{
+ /* Iop_Ctz32 appears to be broken, so use Iop_Ctz64. */
+ return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
+}
+
+/* For expression representing x, return !!x */
+static IRExpr* IRExpr_notnot(IRExpr *exp)
+{
+ /* Iop_ExpCmpNE32 appears broken, so use Iop_ExpCmpNE64. */
+ return unop(Iop_1Uto32, binop(Iop_ExpCmpNE64, unop(Iop_32Uto64, exp),
+ mkU64(0)));
+}
+
+static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
+ Long delta, UChar opc, UChar imm,
+ HChar dis_buf[])
+{
+ /* We only handle PCMPISTRI for now */
+ vassert((opc & 0x03) == 0x03);
+ /* And only an immediate byte of 0x38 or 0x3A */
+ vassert((imm & ~0x02) == 0x38);
+
+ /* FIXME: Is this correct when RegNoL == 16 ? */
+ IRTemp argL = newTemp(Ity_V128);
+ assign(argL, getXMMReg(regNoL));
+ IRTemp argR = newTemp(Ity_V128);
+ assign(argR, getXMMReg(regNoR));
+
+ IRTemp zmaskL = newTemp(Ity_I32);
+ assign(zmaskL, IRExpr_getMSBs16x8(binop(Iop_CmpEQ8x16, mkexpr(argL),
+ mkV128(0))));
+ IRTemp zmaskR = newTemp(Ity_I32);
+ assign(zmaskR, IRExpr_getMSBs16x8(binop(Iop_CmpEQ8x16, mkexpr(argR),
+ mkV128(0))));
+
+ /* We want validL = ~(zmaskL | -zmaskL)
+
+ But this formulation kills memcheck's validity tracking when any
+ bits above the first "1" are invalid. So reformulate as:
+
+ validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
+ */
+
+ IRExpr *ctzL = unop(Iop_32to8, IRExpr_ctz32(mkexpr(zmaskL)));
+
+ /* Generate an 8-bit expression which is zero iff the original is
+ zero. Do this carefully so memcheck can propagate validity bits
+ correctly.
+ */
+ IRTemp zmaskL_zero = newTemp(Ity_I32);
+ assign(zmaskL_zero, IRExpr_notnot(mkexpr(zmaskL)));
+
+ IRTemp validL = newTemp(Ity_I32);
+ assign(validL, binop(Iop_Sub32,
+ IRExpr_Mux0X(unop(Iop_32to8, mkexpr(zmaskL_zero)),
+ mkU32(0),
+ binop(Iop_Shl32, mkU32(1), ctzL)),
+ mkU32(1)));
+
+ /* And similarly for validR. */
+ IRExpr *ctzR = unop(Iop_32to8, IRExpr_ctz32(mkexpr(zmaskR)));
+ IRTemp zmaskR_zero = newTemp(Ity_I32);
+ assign(zmaskR_zero, IRExpr_notnot(mkexpr(zmaskR)));
+ IRTemp validR = newTemp(Ity_I32);
+ assign(validR, binop(Iop_Sub32,
+ IRExpr_Mux0X(unop(Iop_32to8, mkexpr(zmaskR_zero)),
+ mkU32(0),
+ binop(Iop_Shl32, mkU32(1), ctzR)),
+ mkU32(1)));
+
+ /* Do the actual comparison. */
+ IRExpr *boolResII = IRExpr_getMSBs16x8(binop(Iop_CmpEQ8x16,
+ mkexpr(argL),
+ mkexpr(argR)));
+
+ /* Compute boolresII & validL & validR (i.e., if both valid, use
+ comparison result) */
+ IRExpr *intRes1_a = binop(Iop_And32, boolResII,
+ binop(Iop_And32,
+ mkexpr(validL), mkexpr(validR)));
+
+ /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
+ IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
+ mkexpr(validL), mkexpr(validR)));
+ /* Otherwise, zero. */
+ IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
+ binop(Iop_Or32, intRes1_a, intRes1_b));
+
+ /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
+ result. */
+ IRTemp intRes2 = newTemp(Ity_I32);
+ assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
+ binop(Iop_Xor32, intRes1, mkexpr(validL))));
+
+ /* If the 0x40 bit were set in imm=0x3A, we would return the index
+ of the msb. Since it is clear, we return the index of the
+ lsb. */
+ IRExpr *newECX = IRExpr_ctz32(binop(Iop_Or32,
+ mkexpr(intRes2), mkU32(0x10000)));
+
+ /* And thats our rcx. */
+ putIReg32(R_RCX, newECX);
+
+ /* Now for the condition codes... */
+
+ /* C == 0 iff intRes2 == 0 */
+ IRExpr *c_bit = binop(Iop_Shl32, IRExpr_notnot(mkexpr(intRes2)),
+ mkU8(AMD64G_CC_SHIFT_C));
+ /* Z == 1 iff any in argL is 0 */
+ IRExpr *z_bit = binop(Iop_Shl32, mkexpr(zmaskL_zero),
+ mkU8(AMD64G_CC_SHIFT_Z));
+ /* S == 1 iff any in argR is 0 */
+ IRExpr *s_bit = binop(Iop_Shl32, mkexpr(zmaskR_zero),
+ mkU8(AMD64G_CC_SHIFT_S));
+ /* O == IntRes2[0] */
+ IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
+ mkU32(0x01)),
+ mkU8(AMD64G_CC_SHIFT_O));
+
+ /* Put them all together */
+ IRTemp cc = newTemp(Ity_I64);
+ assign(cc, widenUto64(binop(Iop_Or32,
+ binop(Iop_Or32, c_bit, z_bit),
+ binop(Iop_Or32, s_bit, o_bit))));
+ stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
+ stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
+ stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
+ stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
+
+ return delta;
+}
+
/* This can fail, in which case it returns the original (unchanged)
delta. */
static Long dis_PCMPxSTRx ( VexAbiInfo* vbi, Prefix pfx,
@@ -16913,6 +17058,11 @@
delta += alen+1;
}
+ if (imm == 0x3A && isISTRx && !isxSTRM) {
+ return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
+ opc, imm, dis_buf);
+ }
+
/* Now we know the XMM reg numbers for the operands, and the
immediate byte. Is it one we can actually handle? Throw out any
cases for which the helper function has not been verified. */
|