|
From: <sv...@va...> - 2005-11-07 14:23:58
|
Author: sewardj
Date: 2005-11-07 14:23:52 +0000 (Mon, 07 Nov 2005)
New Revision: 1444
Log:
Handle some SSE3 instructions. A curious side-effect of this is that
it makes it possible to run SSE3 code on an SSE2-only machine.
Modified:
trunk/priv/guest-x86/toIR.c
Modified: trunk/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/toIR.c 2005-11-05 15:46:22 UTC (rev 1443)
+++ trunk/priv/guest-x86/toIR.c 2005-11-07 14:23:52 UTC (rev 1444)
@@ -10376,9 +10376,95 @@
=20
=20
/* ---------------------------------------------------- */
- /* --- end of the SSE/SSE2 decoder. --- */
+ /* --- end of the SSE2 decoder. --- */
/* ---------------------------------------------------- */
=20
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* Skip parts of the decoder which don't apply given the stated
+ guest subarchitecture. */
+ if (archinfo->subarch =3D=3D VexSubArchX86_sse0=20
+ || archinfo->subarch =3D=3D VexSubArchX86_sse1
+ /* || archinfo->subarch =3D=3D VexSubArchX86_sse2 */)
+ goto after_sse_decoders;
+
+ insn =3D (UChar*)&guest_code[delta];
+
+ /* F3 0F 12 =3D MOVSLDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (2:2:0:0). */
+ /* F3 0F 16 =3D MOVSHDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (3:3:1:1). */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F=20
+ && (insn[2] =3D=3D 0x12 || insn[2] =3D=3D 0x16)) {
+ IRTemp s3, s2, s1, s0;
+ IRTemp sV =3D newTemp(Ity_V128);
+ Bool isH =3D insn[2] =3D=3D 0x16;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+
+ modrm =3D insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+1;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+alen;
+ }
+
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+ putXMMReg( gregOfRM(modrm),=20
+ isH ? mk128from32s( s3, s3, s1, s1 )
+ : mk128from32s( s2, s2, s0, s0 ) );
+ goto decode_success;
+ }
+
+ /* F2 0F D0 =3D ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xm=
m). */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn=
[2] =3D=3D 0xD0) {
+ IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ a3 =3D a2 =3D a1 =3D a0 =3D s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[3];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("addsubps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+1;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+
+ breakup128to32s( addV, &a3, &a2, &a1, &a0 );
+ breakup128to32s( subV, &s3, &s2, &s1, &s0 );
+
+ putXMMReg( gregOfRM(modrm), mk128from32s( a3, s2, a1, s0 ));
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
after_sse_decoders:
=20
/* Get the primary opcode. */
|