|
From: <sv...@va...> - 2006-12-27 04:21:14
|
Author: sewardj
Date: 2006-12-27 04:21:05 +0000 (Wed, 27 Dec 2006)
New Revision: 1703
Log:
Merge r1702 (x86 front end: Implement MASKMOVQ and MASKMOVDQU)
Modified:
branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c
branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.c
branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.h
branches/VEX_3_2_BRANCH/priv/host-x86/isel.c
branches/VEX_3_2_BRANCH/priv/ir/irdefs.c
branches/VEX_3_2_BRANCH/pub/libvex_ir.h
Modified: branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c 2006-12-27 01:15:29 UTC=
(rev 1702)
+++ branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c 2006-12-27 04:21:05 UTC=
(rev 1703)
@@ -5679,6 +5679,38 @@
break;
}
=20
+ case 0xF7: {
+ IRTemp addr =3D newTemp(Ity_I32);
+ IRTemp regD =3D newTemp(Ity_I64);
+ IRTemp regM =3D newTemp(Ity_I64);
+ IRTemp mask =3D newTemp(Ity_I64);
+ IRTemp olddata =3D newTemp(Ity_I64);
+ IRTemp newdata =3D newTemp(Ity_I64);
+
+ modrm =3D getIByte(delta);
+ if (sz !=3D 4 || (!epartIsReg(modrm)))
+ goto mmx_decode_failure;
+ delta++;
+
+ assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+ assign( regM, getMMXReg( eregOfRM(modrm) ));
+ assign( regD, getMMXReg( gregOfRM(modrm) ));
+ assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
+ assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
+ assign( newdata,=20
+ binop(Iop_Or64,=20
+ binop(Iop_And64,=20
+ mkexpr(regD),=20
+ mkexpr(mask) ),
+ binop(Iop_And64,=20
+ mkexpr(olddata),
+ unop(Iop_Not64, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+ DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
+ nameMMXReg( gregOfRM(modrm) ) );
+ break;
+ }
+
/* --- MMX decode failure --- */
default:
mmx_decode_failure:
@@ -7670,6 +7702,16 @@
goto decode_success;
}
=20
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F7 =3D MASKMOVQ -- 8x8 masked store */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF7) {
+ Bool ok =3D False;
+ delta =3D dis_MMX( &ok, sorb, sz, delta+1 );
+ if (!ok)
+ goto decode_failure;
+ goto decode_success;
+ }
+
/* 0F 5F =3D MAXPS -- max 32Fx4 from R/M to R */
if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x5F) {
delta =3D dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4=
);
@@ -9314,6 +9356,50 @@
/* else fall through */
}
=20
+ /* 66 0F F7 =3D MASKMOVDQU -- store selected bytes of double quadword=
*/
+ if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF7) {
+ modrm =3D getIByte(delta+2);
+ if (sz =3D=3D 2 && epartIsReg(modrm)) {
+ IRTemp regD =3D newTemp(Ity_V128);
+ IRTemp mask =3D newTemp(Ity_V128);
+ IRTemp olddata =3D newTemp(Ity_V128);
+ IRTemp newdata =3D newTemp(Ity_V128);
+ addr =3D newTemp(Ity_I32);
+
+ assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+ assign( regD, getXMMReg( gregOfRM(modrm) ));
+
+ /* Unfortunately can't do the obvious thing with SarN8x16
+ here since that can't be re-emitted as SSE2 code - no such
+ insn. */
+ assign(=20
+ mask,=20
+ binop(Iop_64HLtoV128,
+ binop(Iop_SarN8x8,=20
+ getXMMRegLane64( eregOfRM(modrm), 1 ),=20
+ mkU8(7) ),
+ binop(Iop_SarN8x8,=20
+ getXMMRegLane64( eregOfRM(modrm), 0 ),=20
+ mkU8(7) ) ));
+ assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
+ assign( newdata,=20
+ binop(Iop_OrV128,=20
+ binop(Iop_AndV128,=20
+ mkexpr(regD),=20
+ mkexpr(mask) ),
+ binop(Iop_AndV128,=20
+ mkexpr(olddata),
+ unop(Iop_NotV128, mkexpr(mask)))) );
+ storeLE( mkexpr(addr), mkexpr(newdata) );
+
+ delta +=3D 2+1;
+ DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
+ nameXMMReg( gregOfRM(modrm) ) );
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
/* 66 0F E7 =3D MOVNTDQ -- for us, just a plain SSE store. */
if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE7) {
modrm =3D getIByte(delta+2);
Modified: branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.c 2006-12-=
27 01:15:29 UTC (rev 1702)
+++ branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.c 2006-12-=
27 04:21:05 UTC (rev 1703)
@@ -299,6 +299,11 @@
/* shifts: we don't care about out-of-range ones, since
that is dealt with at a higher level. */
=20
+static inline UChar sar8 ( UChar v, UInt n )
+{
+ return toUChar(((Char)v) >> n);
+}
+
static inline UShort shl16 ( UShort v, UInt n )
{
return toUShort(v << n);
@@ -868,6 +873,22 @@
);
}
=20
+ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 8); */
+ nn &=3D 7;
+ return mk8x8(
+ sar8( sel8x8_7(xx), nn ),
+ sar8( sel8x8_6(xx), nn ),
+ sar8( sel8x8_5(xx), nn ),
+ sar8( sel8x8_4(xx), nn ),
+ sar8( sel8x8_3(xx), nn ),
+ sar8( sel8x8_2(xx), nn ),
+ sar8( sel8x8_1(xx), nn ),
+ sar8( sel8x8_0(xx), nn )
+ );
+}
+
/* ------------ Averaging ------------ */
=20
ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
Modified: branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.h 2006-12-=
27 01:15:29 UTC (rev 1702)
+++ branches/VEX_3_2_BRANCH/priv/host-generic/h_generic_simd64.h 2006-12-=
27 04:21:05 UTC (rev 1703)
@@ -114,6 +114,7 @@
extern ULong h_generic_calc_ShrN16x4 ( ULong, UInt );
extern ULong h_generic_calc_ShrN32x2 ( ULong, UInt );
=20
+extern ULong h_generic_calc_SarN8x8 ( ULong, UInt );
extern ULong h_generic_calc_SarN16x4 ( ULong, UInt );
extern ULong h_generic_calc_SarN32x2 ( ULong, UInt );
=20
Modified: branches/VEX_3_2_BRANCH/priv/host-x86/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/host-x86/isel.c 2006-12-27 01:15:29 UTC =
(rev 1702)
+++ branches/VEX_3_2_BRANCH/priv/host-x86/isel.c 2006-12-27 04:21:05 UTC =
(rev 1703)
@@ -2223,6 +2223,8 @@
fn =3D (HWord)h_generic_calc_SarN32x2; goto shifty;
case Iop_SarN16x4:
fn =3D (HWord)h_generic_calc_SarN16x4; goto shifty;
+ case Iop_SarN8x8:
+ fn =3D (HWord)h_generic_calc_SarN8x8; goto shifty;
shifty: {
/* Note: the following assumes all helpers are of
signature=20
Modified: branches/VEX_3_2_BRANCH/priv/ir/irdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/ir/irdefs.c 2006-12-27 01:15:29 UTC (rev=
1702)
+++ branches/VEX_3_2_BRANCH/priv/ir/irdefs.c 2006-12-27 04:21:05 UTC (rev=
1703)
@@ -340,6 +340,7 @@
case Iop_ShlN32x2: vex_printf("ShlN32x2"); return;
case Iop_ShrN16x4: vex_printf("ShrN16x4"); return;
case Iop_ShrN32x2: vex_printf("ShrN32x2"); return;
+ case Iop_SarN8x8: vex_printf("SarN8x8"); return;
case Iop_SarN16x4: vex_printf("SarN16x4"); return;
case Iop_SarN32x2: vex_printf("SarN32x2"); return;
case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return;
@@ -1471,7 +1472,7 @@
=20
case Iop_ShlN32x2: case Iop_ShlN16x4:
case Iop_ShrN32x2: case Iop_ShrN16x4:
- case Iop_SarN32x2: case Iop_SarN16x4:
+ case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8:
BINARY(Ity_I64,Ity_I8, Ity_I64);
=20
case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
Modified: branches/VEX_3_2_BRANCH/pub/libvex_ir.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/pub/libvex_ir.h 2006-12-27 01:15:29 UTC (rev =
1702)
+++ branches/VEX_3_2_BRANCH/pub/libvex_ir.h 2006-12-27 04:21:05 UTC (rev =
1703)
@@ -494,9 +494,9 @@
Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
=20
/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
- Iop_ShlN16x4, Iop_ShlN32x2,
- Iop_ShrN16x4, Iop_ShrN32x2,
- Iop_SarN16x4, Iop_SarN32x2,
+ Iop_ShlN16x4, Iop_ShlN32x2,
+ Iop_ShrN16x4, Iop_ShrN32x2,
+ Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
=20
/* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */
Iop_QNarrow16Ux4,
|