|
From: <sv...@va...> - 2005-04-06 01:11:14
|
Author: sewardj
Date: 2005-04-06 02:11:08 +0100 (Wed, 06 Apr 2005)
New Revision: 1121
Modified:
trunk/priv/guest-x86/toIR.c
Log:
When generating IR for movsd mem->reg, don't first write the entire
guest reg with zeroes and then overwrite the lower half. This forces
the back end to generate code which creates huge write-after-write
stalls in the memory system of P4s due to the different sized writes.
This apparently small change reduces the run-time of one
sse2-intensive floating point program from 145 seconds to 90 seconds
(--tool=3Dnone).
Modified: trunk/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/toIR.c 2005-04-06 00:47:01 UTC (rev 1120)
+++ trunk/priv/guest-x86/toIR.c 2005-04-06 01:11:08 UTC (rev 1121)
@@ -9278,7 +9278,7 @@
delta +=3D 3+1;
} else {
addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
- putXMMReg( gregOfRM(modrm), mkV128(0) );
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
putXMMRegLane64( gregOfRM(modrm), 0,
loadLE(Ity_I64, mkexpr(addr)) );
DIP("movsd %s,%s\n", dis_buf,
|