|
From: <sv...@va...> - 2005-11-15 11:16:34
|
Author: sewardj
Date: 2005-11-15 11:16:30 +0000 (Tue, 15 Nov 2005)
New Revision: 1460
Log:
Implement SSE2 'clflush'.
Modified:
trunk/priv/guest-x86/ghelpers.c
trunk/priv/guest-x86/toIR.c
Modified: trunk/priv/guest-x86/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/ghelpers.c 2005-11-15 10:21:19 UTC (rev 1459)
+++ trunk/priv/guest-x86/ghelpers.c 2005-11-15 11:16:30 UTC (rev 1460)
@@ -2222,8 +2222,7 @@
=20
vex_state->guest_EMWARN =3D EmWarn_NONE;
=20
- /* These should not ever be either read or written, but we
- initialise them anyway. */
+ /* SSE2 has a 'clflush' cache-line-invalidator which uses these. */
vex_state->guest_TISTART =3D 0;
vex_state->guest_TILEN =3D 0;
}
Modified: trunk/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/toIR.c 2005-11-15 10:21:19 UTC (rev 1459)
+++ trunk/priv/guest-x86/toIR.c 2005-11-15 11:16:30 UTC (rev 1460)
@@ -225,6 +225,8 @@
=20
#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)
=20
+#define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART)
+#define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN)
=20
/*------------------------------------------------------------*/
/*--- Helper bits and pieces for deconstructing the ---*/
@@ -10339,7 +10341,6 @@
goto decode_success;
}
=20
-
//-- /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
//-- if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE=20
//-- && (!epartIsReg(insn[2]))
@@ -10356,25 +10357,38 @@
//-- DIP("fx%s %s\n", store ? "save" : "rstor", dis_buf );
//-- goto decode_success;
//-- }
-//--=20
-//-- /* CLFLUSH -- flush cache line */
-//-- if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
-//-- && (!epartIsReg(insn[2]))
-//-- && (gregOfRM(insn[2]) =3D=3D 7))
-//-- {
-//-- vg_assert(sz =3D=3D 4);
-//-- pair =3D disAMode ( cb, sorb, eip+2, dis_buf );
-//-- t1 =3D LOW24(pair);
-//-- eip +=3D 2+HI8(pair);
-//-- uInstr3(cb, SSE2a_MemRd, 0, /* ignore sz for internal ops */
-//-- Lit16, (((UShort)0x0F) << 8) | (UShort)0xAE,
-//-- Lit16, (UShort)insn[2],
-//-- TempReg, t1 );
-//-- DIP("clflush %s\n", dis_buf);
-//-- goto decode_success;
-//-- }
=20
+ /* 0F AE /7 =3D CLFLUSH -- flush cache line */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
+ && !epartIsReg(insn[2]) && gregOfRM(insn[2]) =3D=3D 7) {
=20
+ /* This is something of a hack. We need to know the size of the
+ cache line containing addr. Since we don't (easily), assume
+ 256 on the basis that no real cache would have a line that
+ big. It's safe to invalidate more stuff than we need, just
+ inefficient. */
+ UInt lineszB =3D 256;
+
+ addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta +=3D 2+alen;
+
+ /* Round addr down to the start of the containing block. */
+ stmt( IRStmt_Put(
+ OFFB_TISTART,
+ binop( Iop_And32,=20
+ mkexpr(addr),=20
+ mkU32( ~(lineszB-1) ))) );
+
+ stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) );
+
+ irbb->jumpkind =3D Ijk_TInval;
+ irbb->next =3D mkU32(guest_EIP_bbstart+delta);
+ dres.whatNext =3D Dis_StopHere;
+
+ DIP("clflush %s\n", dis_buf);
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE2 decoder. --- */
/* ---------------------------------------------------- */
|