|
From: <sv...@va...> - 2010-01-11 10:46:28
|
Author: sewardj
Date: 2010-01-11 10:46:18 +0000 (Mon, 11 Jan 2010)
New Revision: 1955
Log:
For 32-bit reads of integer guest registers, generate a 64-bit Get
followed by a Iop_64to32 narrowing, rather than doing a 32-bit Get.
This makes the Put-to-Get-forwarding optimisation work seamlessly for
code which does 32-bit register operations (very common), which it
never did before. Also add a folding rule to remove the resulting
32-to-64-to-32 widen-narrow chains.
This reduces the amount of code generated overall about 3%, but gives
a much larger speedup, of about 11% for Memcheck running perf/bz2.c.
Not sure why this is, perhaps due to reducing store bandwidth
requirements in the generated code, or due to avoiding
store-forwarding stalls when writing/reading the guest state.
Modified:
trunk/priv/guest_amd64_toIR.c
trunk/priv/ir_opt.c
Modified: trunk/priv/guest_amd64_toIR.c
===================================================================
--- trunk/priv/guest_amd64_toIR.c 2010-01-09 11:43:21 UTC (rev 1954)
+++ trunk/priv/guest_amd64_toIR.c 2010-01-11 10:46:18 UTC (rev 1955)
@@ -972,7 +972,7 @@
switch (sz) {
case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
- case 4: return IRExpr_Get( OFFB_RAX, Ity_I32 );
+ case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
default: vpanic("getIRegRAX(amd64)");
}
@@ -1020,7 +1020,7 @@
switch (sz) {
case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
- case 4: return IRExpr_Get( OFFB_RDX, Ity_I32 );
+ case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
default: vpanic("getIRegRDX(amd64)");
}
@@ -1071,8 +1071,9 @@
static IRExpr* getIReg32 ( UInt regno )
{
vassert(!host_is_bigendian);
- return IRExpr_Get( integerGuestReg64Offset(regno),
- Ity_I32 );
+ return unop(Iop_64to32,
+ IRExpr_Get( integerGuestReg64Offset(regno),
+ Ity_I64 ));
}
static void putIReg32 ( UInt regno, IRExpr* e )
@@ -1136,11 +1137,22 @@
vassert(lo3bits < 8);
vassert(IS_VALID_PFX(pfx));
vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
- return IRExpr_Get(
- offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
- toBool(sz==1 && !haveREX(pfx)) ),
- szToITy(sz)
- );
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get(
+ offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) ),
+ szToITy(sz)
+ )
+ );
+ } else {
+ return IRExpr_Get(
+ offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
+ toBool(sz==1 && !haveREX(pfx)) ),
+ szToITy(sz)
+ );
+ }
}
static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
@@ -1206,8 +1218,15 @@
static
IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
{
- return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
- szToITy(sz) );
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
+ szToITy(sz) ));
+ } else {
+ return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
+ szToITy(sz) );
+ }
}
static
@@ -1246,8 +1265,15 @@
static
IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
{
- return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
- szToITy(sz) );
+ if (sz == 4) {
+ sz = 8;
+ return unop(Iop_64to32,
+ IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
+ szToITy(sz) ));
+ } else {
+ return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
+ szToITy(sz) );
+ }
}
static
Modified: trunk/priv/ir_opt.c
===================================================================
--- trunk/priv/ir_opt.c 2010-01-09 11:43:21 UTC (rev 1954)
+++ trunk/priv/ir_opt.c 2010-01-11 10:46:18 UTC (rev 1955)
@@ -3970,6 +3970,11 @@
if (is_Unop(aa, Iop_CmpwNEZ64))
return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
break;
+ case Iop_64to32:
+ /* 64to32( 32Uto64 ( x )) --> x */
+ if (is_Unop(aa, Iop_32Uto64))
+ return aa->Iex.Unop.arg;
+ break;
case Iop_1Sto32:
/* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
@@ -3984,6 +3989,7 @@
}
break;
+
default:
break;
}
|