|
From: <sv...@va...> - 2005-07-06 18:49:22
|
Author: sewardj
Date: 2005-07-06 19:48:59 +0100 (Wed, 06 Jul 2005)
New Revision: 4115
Log:
Extensively re-analyse, re-check and revise the scheme for expensive
handling of integer EQ/NE, which can sometimes do better than the
naive scheme when the inputs are partially defined. I never was
convinced it was correct before, but now I am. Regtest to follow.
Modified:
trunk/memcheck/mc_translate.c
Modified: trunk/memcheck/mc_translate.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/memcheck/mc_translate.c 2005-07-06 13:19:11 UTC (rev 4114)
+++ trunk/memcheck/mc_translate.c 2005-07-06 18:48:59 UTC (rev 4115)
@@ -563,23 +563,44 @@
The result is:
=20
PCastTo<1> (
- PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version
+ -- naive version
+ PCastTo<sz>( UifU<sz>(vxx, vyy) )
+
`DifD<sz>`
- PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
+
+ -- improvement term
+ PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
)
+
where
vec contains 0 (defined) bits where the corresponding arg bits=20
- are defined but different, and 1 bits otherwise:
+ are defined but different, and 1 bits otherwise.
=20
- vec =3D UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
+ vec =3D Or<sz>( vxx, // 0 iff bit defined
+ vyy, // 0 iff bit defined
+ Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
+ )
+ =20
+ If any bit of vec is 0, the result is defined and so the=20
+ improvement term should produce 0...0, else it should produce
+ 1...1.
+
+ Hence require for the improvement term:
+
+ if vec =3D=3D 1...1 then 1...1 else 0...0
+ ->
+ PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
+
+ This was extensively re-analysed and checked on 6 July 05.
*/
static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
IRType ty,
IRAtom* vxx, IRAtom* vyy,=20
IRAtom* xx, IRAtom* yy )
{
- IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
- IROp opDIFD, opUIFU, opXOR, opNOT, opCMP;
+ IRAtom *naive, *vec, *improvement_term;
+ IRAtom *improved, *final_cast, *top;
+ IROp opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
=20
tl_assert(isShadowAtom(mce,vxx));
tl_assert(isShadowAtom(mce,vyy));
@@ -590,6 +611,7 @@
=20
switch (ty) {
case Ity_I32:
+ opOR =3D Iop_Or32;
opDIFD =3D Iop_And32;
opUIFU =3D Iop_Or32;
opNOT =3D Iop_Not32;
@@ -598,6 +620,7 @@
top =3D mkU32(0xFFFFFFFF);
break;
case Ity_I64:
+ opOR =3D Iop_Or64;
opDIFD =3D Iop_And64;
opUIFU =3D Iop_Or64;
opNOT =3D Iop_Not64;
@@ -615,18 +638,18 @@
vec=20
=3D assignNew(
mce,ty,=20
- binop( opUIFU,
- assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
+ binop( opOR,
+ assignNew(mce,ty, binop(opOR, vxx, vyy)),
assignNew(
mce,ty,=20
unop( opNOT,
assignNew(mce,ty, binop(opXOR, xx, yy))))));
=20
- vec_cmpd
+ improvement_term
=3D mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top=
)));
=20
improved
- =3D assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
+ =3D assignNew( mce,ty, binop(opDIFD, naive, improvement_term) );
=20
final_cast
=3D mkPCastTo( mce, Ity_I1, improved );
@@ -1685,7 +1708,6 @@
case Iop_Mul32:
return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
=20
- /* could do better: Add64, Sub64 */
case Iop_Add64:
if (mce->bogusLiterals)
return expensiveAddSub(mce,True,Ity_I64,=20
@@ -1713,6 +1735,7 @@
return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
=20
case Iop_CmpEQ64:=20
+ case Iop_CmpNE64:
if (mce->bogusLiterals)
return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,=
atom2 );
else
@@ -1720,10 +1743,10 @@
cheap_cmp64:
case Iop_CmpLE64S: case Iop_CmpLE64U:=20
case Iop_CmpLT64U: case Iop_CmpLT64S:
- case Iop_CmpNE64:
return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
=20
case Iop_CmpEQ32:=20
+ case Iop_CmpNE32:
if (mce->bogusLiterals)
return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,=
atom2 );
else
@@ -1731,7 +1754,6 @@
cheap_cmp32:
case Iop_CmpLE32S: case Iop_CmpLE32U:=20
case Iop_CmpLT32U: case Iop_CmpLT32S:
- case Iop_CmpNE32:
return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
=20
case Iop_CmpEQ16: case Iop_CmpNE16:
|