|
From: <sv...@va...> - 2005-05-09 17:53:04
|
Author: sewardj
Date: 2005-05-09 18:52:56 +0100 (Mon, 09 May 2005)
New Revision: 1173
Modified:
trunk/priv/guest-amd64/ghelpers.c
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/isel.c
Log:
Make a whole bunch of mmx instructions work.
Modified: trunk/priv/guest-amd64/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/ghelpers.c 2005-05-09 12:16:33 UTC (rev 1172)
+++ trunk/priv/guest-amd64/ghelpers.c 2005-05-09 17:52:56 UTC (rev 1173)
@@ -1383,7 +1383,44 @@
}
=20
=20
+/*---------------------------------------------------------------*/
+/*--- Helpers for MMX/SSE/SSE2. ---*/
+/*---------------------------------------------------------------*/
=20
+static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
+ return (((ULong)w1) << 32) | ((ULong)w0);
+}
+
+static inline UShort sel16x4_3 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUShort(hi32 >> 16);
+}
+static inline UShort sel16x4_2 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUShort(hi32);
+}
+static inline UShort sel16x4_1 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUShort(lo32 >> 16);
+}
+static inline UShort sel16x4_0 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUShort(lo32);
+}
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
+{
+ return
+ mk32x2(
+ (((Int)(Short)sel16x4_3(xx)) * ((Int)(Short)sel16x4_3(yy)))
+ + (((Int)(Short)sel16x4_2(xx)) * ((Int)(Short)sel16x4_2(yy))=
),
+ (((Int)(Short)sel16x4_1(xx)) * ((Int)(Short)sel16x4_1(yy)))
+ + (((Int)(Short)sel16x4_0(xx)) * ((Int)(Short)sel16x4_0(yy))=
)
+ );
+}
+
+
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with, and describing, ---*/
/*--- guest state as a whole. ---*/
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-09 12:16:33 UTC (rev 1172)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-09 17:52:56 UTC (rev 1173)
@@ -667,6 +667,13 @@
return (Int)( (mod_reg_rm >> 3) & 7 );
}
=20
+/* Ditto the 'e' field of a modRM byte. */
+inline
+static Int eregLO3ofRM ( UChar mod_reg_rm )
+{
+ return (Int)(mod_reg_rm & 0x7);
+}
+
/* Get a 8/16/32-bit unsigned value out of the insn stream. */
=20
static UChar getUChar ( ULong delta )
@@ -881,6 +888,12 @@
toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) =3D=3D 0);
}
=20
+/* Return True iff pfx has any of 66, F2 and F3 set */
+static Bool have66orF2orF3 ( Prefix pfx )
+{
+ return ! haveNo66noF2noF3(pfx);
+}
+
/* Clear all the segment-override bits in a prefix. */
static Prefix clearSegBits ( Prefix p )
{
@@ -1968,15 +1981,15 @@
//.. default: vpanic("nameSReg(x86)");
//.. }
//.. }
-//..=20
-//.. static HChar* nameMMXReg ( Int mmxreg )
-//.. {
-//.. static HChar* mmx_names[8]=20
-//.. =3D { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", =
"%mm7" };
-//.. if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(x86,guest)");
-//.. return mmx_names[mmxreg];
-//.. }
=20
+static HChar* nameMMXReg ( Int mmxreg )
+{
+ static HChar* mmx_names[8]=20
+ =3D { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7=
" };
+ if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
+ return mmx_names[mmxreg];
+}
+
static HChar* nameXMMReg ( Int xmmreg )
{
static HChar* xmm_names[16]=20
@@ -1988,16 +2001,16 @@
return xmm_names[xmmreg];
}
=20
-//.. static Char* nameMMXGran ( UChar gran )
-//.. {
-//.. switch (gran) {
-//.. case 0: return "b";
-//.. case 1: return "w";
-//.. case 2: return "d";
-//.. case 3: return "q";
-//.. default: vpanic("nameMMXGran(x86,guest)");
-//.. }
-//.. }
+static HChar* nameMMXGran ( UChar gran )
+{
+ switch (gran) {
+ case 0: return "b";
+ case 1: return "w";
+ case 2: return "d";
+ case 3: return "q";
+ default: vpanic("nameMMXGran(amd64,guest)");
+ }
+}
=20
static HChar nameISize ( Int size )
{
@@ -5790,288 +5803,288 @@
}
=20
=20
-//.. /*------------------------------------------------------------*/
-//.. /*--- ---*/
-//.. /*--- MMX INSTRUCTIONS ---*/
-//.. /*--- ---*/
-//.. /*------------------------------------------------------------*/
-//..=20
-//.. /* Effect of MMX insns on x87 FPU state (table 11-2 of=20
-//.. IA32 arch manual, volume 3):
-//..=20
-//.. Read from, or write to MMX register (viz, any insn except EMMS):
-//.. * All tags set to Valid (non-empty) -- FPTAGS[i] :=3D nonzero
-//.. * FP stack pointer set to zero
-//..=20
-//.. EMMS:
-//.. * All tags set to Invalid (empty) -- FPTAGS[i] :=3D zero
-//.. * FP stack pointer set to zero
-//.. */
-//..=20
-//.. static void do_MMX_preamble ( void )
-//.. {
-//.. Int i;
-//.. IRArray* descr =3D mkIRArray( OFFB_FPTAGS, Ity_I8, 8 );
-//.. IRExpr* zero =3D mkU32(0);
-//.. IRExpr* tag1 =3D mkU8(1);
-//.. put_ftop(zero);
-//.. for (i =3D 0; i < 8; i++)
-//.. stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
-//.. }
-//..=20
-//.. static void do_EMMS_preamble ( void )
-//.. {
-//.. Int i;
-//.. IRArray* descr =3D mkIRArray( OFFB_FPTAGS, Ity_I8, 8 );
-//.. IRExpr* zero =3D mkU32(0);
-//.. IRExpr* tag0 =3D mkU8(0);
-//.. put_ftop(zero);
-//.. for (i =3D 0; i < 8; i++)
-//.. stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
-//.. }
-//..=20
-//..=20
-//.. static IRExpr* getMMXReg ( UInt archreg )
-//.. {
-//.. vassert(archreg < 8);
-//.. return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
-//.. }
-//..=20
-//..=20
-//.. static void putMMXReg ( UInt archreg, IRExpr* e )
-//.. {
-//.. vassert(archreg < 8);
-//.. vassert(typeOfIRExpr(irbb->tyenv,e) =3D=3D Ity_I64);
-//.. stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
-//.. }
-//..=20
-//..=20
-//.. /* Helper for non-shift MMX insns. Note this is incomplete in the
-//.. sense that it does not first call do_MMX_preamble() -- that is t=
he
-//.. responsibility of its caller. */
-//..=20
-//.. static=20
-//.. UInt dis_MMXop_regmem_to_reg ( UChar sorb,
-//.. UInt delta,
-//.. UChar opc,
-//.. Char* name,
-//.. Bool show_granularity )
-//.. {
-//.. HChar dis_buf[50];
-//.. UChar modrm =3D getUChar(delta);
-//.. Bool isReg =3D epartIsReg(modrm);
-//.. IRExpr* argL =3D NULL;
-//.. IRExpr* argR =3D NULL;
-//.. IRExpr* argG =3D NULL;
-//.. IRExpr* argE =3D NULL;
-//.. IRTemp res =3D newTemp(Ity_I64);
-//..=20
-//.. Bool invG =3D False;
-//.. IROp op =3D Iop_INVALID;
-//.. void* hAddr =3D NULL;
-//.. Char* hName =3D NULL;
-//.. Bool eLeft =3D False;
-//..=20
-//.. # define XXX(_name) do { hAddr =3D &_name; hName =3D #_name; } whi=
le (0)
-//..=20
-//.. switch (opc) {
-//.. /* Original MMX ones */
-//.. case 0xFC: op =3D Iop_Add8x8; break;
-//.. case 0xFD: op =3D Iop_Add16x4; break;
-//.. case 0xFE: op =3D Iop_Add32x2; break;
-//..=20
-//.. case 0xEC: op =3D Iop_QAdd8Sx8; break;
-//.. case 0xED: op =3D Iop_QAdd16Sx4; break;
-//..=20
-//.. case 0xDC: op =3D Iop_QAdd8Ux8; break;
-//.. case 0xDD: op =3D Iop_QAdd16Ux4; break;
-//..=20
-//.. case 0xF8: op =3D Iop_Sub8x8; break;
-//.. case 0xF9: op =3D Iop_Sub16x4; break;
-//.. case 0xFA: op =3D Iop_Sub32x2; break;
-//..=20
-//.. case 0xE8: op =3D Iop_QSub8Sx8; break;
-//.. case 0xE9: op =3D Iop_QSub16Sx4; break;
-//..=20
-//.. case 0xD8: op =3D Iop_QSub8Ux8; break;
-//.. case 0xD9: op =3D Iop_QSub16Ux4; break;
-//..=20
-//.. case 0xE5: op =3D Iop_MulHi16Sx4; break;
-//.. case 0xD5: op =3D Iop_Mul16x4; break;
-//.. case 0xF5: XXX(x86g_calculate_mmx_pmaddwd); break;
-//..=20
-//.. case 0x74: op =3D Iop_CmpEQ8x8; break;
-//.. case 0x75: op =3D Iop_CmpEQ16x4; break;
-//.. case 0x76: op =3D Iop_CmpEQ32x2; break;
-//..=20
-//.. case 0x64: op =3D Iop_CmpGT8Sx8; break;
-//.. case 0x65: op =3D Iop_CmpGT16Sx4; break;
-//.. case 0x66: op =3D Iop_CmpGT32Sx2; break;
-//..=20
-//.. case 0x6B: op =3D Iop_QNarrow32Sx2; eLeft =3D True; break;
-//.. case 0x63: op =3D Iop_QNarrow16Sx4; eLeft =3D True; break;
-//.. case 0x67: op =3D Iop_QNarrow16Ux4; eLeft =3D True; break;
-//..=20
-//.. case 0x68: op =3D Iop_InterleaveHI8x8; eLeft =3D True; break=
;
-//.. case 0x69: op =3D Iop_InterleaveHI16x4; eLeft =3D True; break=
;
-//.. case 0x6A: op =3D Iop_InterleaveHI32x2; eLeft =3D True; break=
;
-//..=20
-//.. case 0x60: op =3D Iop_InterleaveLO8x8; eLeft =3D True; break=
;
-//.. case 0x61: op =3D Iop_InterleaveLO16x4; eLeft =3D True; break=
;
-//.. case 0x62: op =3D Iop_InterleaveLO32x2; eLeft =3D True; break=
;
-//..=20
-//.. case 0xDB: op =3D Iop_And64; break;
-//.. case 0xDF: op =3D Iop_And64; invG =3D True; break;
-//.. case 0xEB: op =3D Iop_Or64; break;
-//.. case 0xEF: /* Possibly do better here if argL and argR are th=
e
-//.. same reg */
-//.. op =3D Iop_Xor64; break;
-//..=20
-//.. /* Introduced in SSE1 */
-//.. case 0xE0: op =3D Iop_Avg8Ux8; break;
-//.. case 0xE3: op =3D Iop_Avg16Ux4; break;
-//.. case 0xEE: op =3D Iop_Max16Sx4; break;
-//.. case 0xDE: op =3D Iop_Max8Ux8; break;
-//.. case 0xEA: op =3D Iop_Min16Sx4; break;
-//.. case 0xDA: op =3D Iop_Min8Ux8; break;
-//.. case 0xE4: op =3D Iop_MulHi16Ux4; break;
-//.. case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
-//..=20
-//.. /* Introduced in SSE2 */
-//.. case 0xD4: op =3D Iop_Add64; break;
-//.. case 0xFB: op =3D Iop_Sub64; break;
-//..=20
-//.. default:=20
-//.. vex_printf("\n0x%x\n", (Int)opc);
-//.. vpanic("dis_MMXop_regmem_to_reg");
-//.. }
-//..=20
-//.. # undef XXX
-//..=20
-//.. argG =3D getMMXReg(gregOfRM(modrm));
-//.. if (invG)
-//.. argG =3D unop(Iop_Not64, argG);
-//..=20
-//.. if (isReg) {
-//.. delta++;
-//.. argE =3D getMMXReg(eregOfRM(modrm));
-//.. } else {
-//.. Int len;
-//.. IRTemp addr =3D disAMode( &len, sorb, delta, dis_buf );
-//.. delta +=3D len;
-//.. argE =3D loadLE(Ity_I64, mkexpr(addr));
-//.. }
-//..=20
-//.. if (eLeft) {
-//.. argL =3D argE;
-//.. argR =3D argG;
-//.. } else {
-//.. argL =3D argG;
-//.. argR =3D argE;
-//.. }
-//..=20
-//.. if (op !=3D Iop_INVALID) {
-//.. vassert(hName =3D=3D NULL);
-//.. vassert(hAddr =3D=3D NULL);
-//.. assign(res, binop(op, argL, argR));
-//.. } else {
-//.. vassert(hName !=3D NULL);
-//.. vassert(hAddr !=3D NULL);
-//.. assign( res,=20
-//.. mkIRExprCCall(
-//.. Ity_I64,=20
-//.. 0/*regparms*/, hName, hAddr,
-//.. mkIRExprVec_2( argL, argR )
-//.. )=20
-//.. );
-//.. }
-//..=20
-//.. putMMXReg( gregOfRM(modrm), mkexpr(res) );
-//..=20
-//.. DIP("%s%s %s, %s\n",=20
-//.. name, show_granularity ? nameMMXGran(opc & 3) : (Char*)"",
-//.. ( isReg ? nameMMXReg(eregOfRM(modrm)) : dis_buf ),
-//.. nameMMXReg(gregOfRM(modrm)) );
-//..=20
-//.. return delta;
-//.. }
-//..=20
-//..=20
-//.. /* Vector by scalar shift of G by the amount specified at the botto=
m
-//.. of E. This is a straight copy of dis_SSE_shiftG_byE. */
-//..=20
-//.. static UInt dis_MMX_shiftG_byE ( UChar sorb, ULong delta,=20
-//.. HChar* opname, IROp op )
-//.. {
-//.. HChar dis_buf[50];
-//.. Int alen, size;
-//.. IRTemp addr;
-//.. Bool shl, shr, sar;
-//.. UChar rm =3D getUChar(delta);
-//.. IRTemp g0 =3D newTemp(Ity_I64);
-//.. IRTemp g1 =3D newTemp(Ity_I64);
-//.. IRTemp amt =3D newTemp(Ity_I32);
-//.. IRTemp amt8 =3D newTemp(Ity_I8);
-//..=20
-//.. if (epartIsReg(rm)) {
-//.. assign( amt, unop(Iop_64to32, getMMXReg(eregOfRM(rm))) );
-//.. DIP("%s %s,%s\n", opname,
-//.. nameMMXReg(eregOfRM(rm)),
-//.. nameMMXReg(gregOfRM(rm)) );
-//.. delta++;
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta, dis_buf );
-//.. assign( amt, loadLE(Ity_I32, mkexpr(addr)) );
-//.. DIP("%s %s,%s\n", opname,
-//.. dis_buf,
-//.. nameMMXReg(gregOfRM(rm)) );
-//.. delta +=3D alen;
-//.. }
-//.. assign( g0, getMMXReg(gregOfRM(rm)) );
-//.. assign( amt8, unop(Iop_32to8, mkexpr(amt)) );
-//..=20
-//.. shl =3D shr =3D sar =3D False;
-//.. size =3D 0;
-//.. switch (op) {
-//.. case Iop_ShlN16x4: shl =3D True; size =3D 32; break;
-//.. case Iop_ShlN32x2: shl =3D True; size =3D 32; break;
-//.. case Iop_Shl64: shl =3D True; size =3D 64; break;
-//.. case Iop_ShrN16x4: shr =3D True; size =3D 16; break;
-//.. case Iop_ShrN32x2: shr =3D True; size =3D 32; break;
-//.. case Iop_Shr64: shr =3D True; size =3D 64; break;
-//.. case Iop_SarN16x4: sar =3D True; size =3D 16; break;
-//.. case Iop_SarN32x2: sar =3D True; size =3D 32; break;
-//.. default: vassert(0);
-//.. }
-//..=20
-//.. if (shl || shr) {
-//.. assign(=20
-//.. g1,
-//.. IRExpr_Mux0X(
-//.. unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size=
))),
-//.. mkU64(0),
-//.. binop(op, mkexpr(g0), mkexpr(amt8))
-//.. )
-//.. );
-//.. } else=20
-//.. if (sar) {
-//.. assign(=20
-//.. g1,
-//.. IRExpr_Mux0X(
-//.. unop(Iop_1Uto8,binop(Iop_CmpLT32U,mkexpr(amt),mkU32(size=
))),
-//.. binop(op, mkexpr(g0), mkU8(size-1)),
-//.. binop(op, mkexpr(g0), mkexpr(amt8))
-//.. )
-//.. );
-//.. } else {
-//.. vassert(0);
-//.. }
-//..=20
-//.. putMMXReg( gregOfRM(rm), mkexpr(g1) );
-//.. return delta;
-//.. }
-//..=20
-//..=20
+/*------------------------------------------------------------*/
+/*--- ---*/
+/*--- MMX INSTRUCTIONS ---*/
+/*--- ---*/
+/*------------------------------------------------------------*/
+
+/* Effect of MMX insns on x87 FPU state (table 11-2 of=20
+ IA32 arch manual, volume 3):
+
+ Read from, or write to MMX register (viz, any insn except EMMS):
+ * All tags set to Valid (non-empty) -- FPTAGS[i] :=3D nonzero
+ * FP stack pointer set to zero
+
+ EMMS:
+ * All tags set to Invalid (empty) -- FPTAGS[i] :=3D zero
+ * FP stack pointer set to zero
+*/
+
+static void do_MMX_preamble ( void )
+{
+ Int i;
+ IRArray* descr =3D mkIRArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero =3D mkU32(0);
+ IRExpr* tag1 =3D mkU8(1);
+ put_ftop(zero);
+ for (i =3D 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag1 ) );
+}
+
+static void do_EMMS_preamble ( void )
+{
+ Int i;
+ IRArray* descr =3D mkIRArray( OFFB_FPTAGS, Ity_I8, 8 );
+ IRExpr* zero =3D mkU32(0);
+ IRExpr* tag0 =3D mkU8(0);
+ put_ftop(zero);
+ for (i =3D 0; i < 8; i++)
+ stmt( IRStmt_PutI( descr, zero, i, tag0 ) );
+}
+
+
+static IRExpr* getMMXReg ( UInt archreg )
+{
+ vassert(archreg < 8);
+ return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
+}
+
+
+static void putMMXReg ( UInt archreg, IRExpr* e )
+{
+ vassert(archreg < 8);
+ vassert(typeOfIRExpr(irbb->tyenv,e) =3D=3D Ity_I64);
+ stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
+}
+
+
+/* Helper for non-shift MMX insns. Note this is incomplete in the
+ sense that it does not first call do_MMX_preamble() -- that is the
+ responsibility of its caller. */
+
+static=20
+ULong dis_MMXop_regmem_to_reg ( Prefix pfx,
+ ULong delta,
+ UChar opc,
+ Char* name,
+ Bool show_granularity )
+{
+ HChar dis_buf[50];
+ UChar modrm =3D getUChar(delta);
+ Bool isReg =3D epartIsReg(modrm);
+ IRExpr* argL =3D NULL;
+ IRExpr* argR =3D NULL;
+ IRExpr* argG =3D NULL;
+ IRExpr* argE =3D NULL;
+ IRTemp res =3D newTemp(Ity_I64);
+
+ Bool invG =3D False;
+ IROp op =3D Iop_INVALID;
+ void* hAddr =3D NULL;
+ Char* hName =3D NULL;
+ Bool eLeft =3D False;
+
+# define XXX(_name) do { hAddr =3D &_name; hName =3D #_name; } while (0=
)
+
+ switch (opc) {
+ /* Original MMX ones */
+ case 0xFC: op =3D Iop_Add8x8; break;
+ case 0xFD: op =3D Iop_Add16x4; break;
+ case 0xFE: op =3D Iop_Add32x2; break;
+
+ case 0xEC: op =3D Iop_QAdd8Sx8; break;
+ case 0xED: op =3D Iop_QAdd16Sx4; break;
+
+ case 0xDC: op =3D Iop_QAdd8Ux8; break;
+ case 0xDD: op =3D Iop_QAdd16Ux4; break;
+
+ case 0xF8: op =3D Iop_Sub8x8; break;
+ case 0xF9: op =3D Iop_Sub16x4; break;
+ case 0xFA: op =3D Iop_Sub32x2; break;
+
+ case 0xE8: op =3D Iop_QSub8Sx8; break;
+ case 0xE9: op =3D Iop_QSub16Sx4; break;
+
+ case 0xD8: op =3D Iop_QSub8Ux8; break;
+ case 0xD9: op =3D Iop_QSub16Ux4; break;
+
+ case 0xE5: op =3D Iop_MulHi16Sx4; break;
+ case 0xD5: op =3D Iop_Mul16x4; break;
+ case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
+
+ case 0x74: op =3D Iop_CmpEQ8x8; break;
+ case 0x75: op =3D Iop_CmpEQ16x4; break;
+ case 0x76: op =3D Iop_CmpEQ32x2; break;
+
+ case 0x64: op =3D Iop_CmpGT8Sx8; break;
+ case 0x65: op =3D Iop_CmpGT16Sx4; break;
+ case 0x66: op =3D Iop_CmpGT32Sx2; break;
+
+ case 0x6B: op =3D Iop_QNarrow32Sx2; eLeft =3D True; break;
+ case 0x63: op =3D Iop_QNarrow16Sx4; eLeft =3D True; break;
+ case 0x67: op =3D Iop_QNarrow16Ux4; eLeft =3D True; break;
+
+ case 0x68: op =3D Iop_InterleaveHI8x8; eLeft =3D True; break;
+ case 0x69: op =3D Iop_InterleaveHI16x4; eLeft =3D True; break;
+ case 0x6A: op =3D Iop_InterleaveHI32x2; eLeft =3D True; break;
+
+ case 0x60: op =3D Iop_InterleaveLO8x8; eLeft =3D True; break;
+ case 0x61: op =3D Iop_InterleaveLO16x4; eLeft =3D True; break;
+ case 0x62: op =3D Iop_InterleaveLO32x2; eLeft =3D True; break;
+
+ case 0xDB: op =3D Iop_And64; break;
+ case 0xDF: op =3D Iop_And64; invG =3D True; break;
+ case 0xEB: op =3D Iop_Or64; break;
+ case 0xEF: /* Possibly do better here if argL and argR are the
+ same reg */
+ op =3D Iop_Xor64; break;
+
+ /* Introduced in SSE1 */
+ case 0xE0: op =3D Iop_Avg8Ux8; break;
+ case 0xE3: op =3D Iop_Avg16Ux4; break;
+ case 0xEE: op =3D Iop_Max16Sx4; break;
+ case 0xDE: op =3D Iop_Max8Ux8; break;
+ case 0xEA: op =3D Iop_Min16Sx4; break;
+ case 0xDA: op =3D Iop_Min8Ux8; break;
+ case 0xE4: op =3D Iop_MulHi16Ux4; break;
+ // case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
+
+ /* Introduced in SSE2 */
+ case 0xD4: op =3D Iop_Add64; break;
+ case 0xFB: op =3D Iop_Sub64; break;
+
+ default:=20
+ vex_printf("\n0x%x\n", (Int)opc);
+ vpanic("dis_MMXop_regmem_to_reg");
+ }
+
+# undef XXX
+
+ argG =3D getMMXReg(gregLO3ofRM(modrm));
+ if (invG)
+ argG =3D unop(Iop_Not64, argG);
+
+ if (isReg) {
+ delta++;
+ argE =3D getMMXReg(eregLO3ofRM(modrm));
+ } else {
+ Int len;
+ IRTemp addr =3D disAMode( &len, pfx, delta, dis_buf, 0 );
+ delta +=3D len;
+ argE =3D loadLE(Ity_I64, mkexpr(addr));
+ }
+
+ if (eLeft) {
+ argL =3D argE;
+ argR =3D argG;
+ } else {
+ argL =3D argG;
+ argR =3D argE;
+ }
+
+ if (op !=3D Iop_INVALID) {
+ vassert(hName =3D=3D NULL);
+ vassert(hAddr =3D=3D NULL);
+ assign(res, binop(op, argL, argR));
+ } else {
+ vassert(hName !=3D NULL);
+ vassert(hAddr !=3D NULL);
+ assign( res,=20
+ mkIRExprCCall(
+ Ity_I64,=20
+ 0/*regparms*/, hName, hAddr,
+ mkIRExprVec_2( argL, argR )
+ )=20
+ );
+ }
+
+ putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
+
+ DIP("%s%s %s, %s\n",=20
+ name, show_granularity ? nameMMXGran(opc & 3) : "",
+ ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
+ nameMMXReg(gregLO3ofRM(modrm)) );
+
+ return delta;
+}
+
+
+/* Vector by scalar shift of G by the amount specified at the bottom
+ of E. This is a straight copy of dis_SSE_shiftG_byE. */
+
+static ULong dis_MMX_shiftG_byE ( Prefix pfx, ULong delta,=20
+ HChar* opname, IROp op )
+{
+ HChar dis_buf[50];
+ Int alen, size;
+ IRTemp addr;
+ Bool shl, shr, sar;
+ UChar rm =3D getUChar(delta);
+ IRTemp g0 =3D newTemp(Ity_I64);
+ IRTemp g1 =3D newTemp(Ity_I64);
+ IRTemp amt =3D newTemp(Ity_I64);
+ IRTemp amt8 =3D newTemp(Ity_I8);
+
+ if (epartIsReg(rm)) {
+ assign( amt, getMMXReg(eregLO3ofRM(rm)) );
+ DIP("%s %s,%s\n", opname,
+ nameMMXReg(eregLO3ofRM(rm)),
+ nameMMXReg(gregLO3ofRM(rm)) );
+ delta++;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta, dis_buf, 0 );
+ assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(rm)) );
+ delta +=3D alen;
+ }
+ assign( g0, getMMXReg(gregLO3ofRM(rm)) );
+ assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
+
+ shl =3D shr =3D sar =3D False;
+ size =3D 0;
+ switch (op) {
+ case Iop_ShlN16x4: shl =3D True; size =3D 32; break;
+ case Iop_ShlN32x2: shl =3D True; size =3D 32; break;
+ case Iop_Shl64: shl =3D True; size =3D 64; break;
+ case Iop_ShrN16x4: shr =3D True; size =3D 16; break;
+ case Iop_ShrN32x2: shr =3D True; size =3D 32; break;
+ case Iop_Shr64: shr =3D True; size =3D 64; break;
+ case Iop_SarN16x4: sar =3D True; size =3D 16; break;
+ case Iop_SarN32x2: sar =3D True; size =3D 32; break;
+ default: vassert(0);
+ }
+
+ if (shl || shr) {
+ assign(=20
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
+ mkU64(0),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else=20
+ if (sar) {
+ assign(=20
+ g1,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size))),
+ binop(op, mkexpr(g0), mkU8(size-1)),
+ binop(op, mkexpr(g0), mkexpr(amt8))
+ )
+ );
+ } else {
+ vassert(0);
+ }
+
+ putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
+ return delta;
+}
+
+
//.. /* Vector by scalar shift of E by an immediate byte. This is a
//.. straight copy of dis_SSE_shiftE_imm. */
//..=20
@@ -6126,24 +6139,24 @@
//.. putMMXReg( eregOfRM(rm), mkexpr(e1) );
//.. return delta;
//.. }
-//..=20
-//..=20
-//.. /* Completely handle all MMX instructions except emms. */
-//..=20
-//.. static
-//.. UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, ULong delta )
-//.. {
-//.. Int len;
-//.. UChar modrm;
-//.. HChar dis_buf[50];
-//.. UChar opc =3D getUChar(delta);
-//.. delta++;
-//..=20
-//.. /* dis_MMX handles all insns except emms. */
-//.. do_MMX_preamble();
-//..=20
-//.. switch (opc) {
-//..=20
+
+
+/* Completely handle all MMX instructions except emms. */
+
+static
+ULong dis_MMX ( Bool* decode_ok, Prefix pfx, Int sz, ULong delta )
+{
+ Int len;
+ UChar modrm;
+ HChar dis_buf[50];
+ UChar opc =3D getUChar(delta);
+ delta++;
+
+ /* dis_MMX handles all insns except emms. */
+ do_MMX_preamble();
+
+ switch (opc) {
+
//.. case 0x6E:=20
//.. /* MOVD (src)ireg-or-mem (E), (dst)mmxreg (G)*/
//.. if (sz !=3D 4)=20
@@ -6188,201 +6201,200 @@
//.. DIP("movd %s, %s\n", nameMMXReg(gregOfRM(modrm)), dis_b=
uf);
//.. }
//.. break;
+
+ case 0x6F:
+ /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ modrm =3D getUChar(delta);
+ if (epartIsReg(modrm)) {
+ delta++;
+ putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm))=
);
+ DIP("movq %s, %s\n",=20
+ nameMMXReg(eregLO3ofRM(modrm)),=20
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ IRTemp addr =3D disAMode( &len, pfx, delta, dis_buf, 0 );
+ delta +=3D len;
+ putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr))=
);
+ DIP("movq %s, %s\n",=20
+ dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ break;
+
+ case 0x7F:
+ /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ modrm =3D getUChar(delta);
+ if (epartIsReg(modrm)) {
+ /* Fall through. The assembler doesn't appear to generate
+ these. */
+ goto mmx_decode_failure;
+ } else {
+ IRTemp addr =3D disAMode( &len, pfx, delta, dis_buf, 0 );
+ delta +=3D len;
+ storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("mov(nt)q %s, %s\n",=20
+ nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
+ }
+ break;
+
+ case 0xFC:=20
+ case 0xFD:=20
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "padd", Tr=
ue );
+ break;
+
+ case 0xEC:=20
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "padds", T=
rue );
+ break;
+
+ case 0xDC:=20
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "paddus", =
True );
+ break;
+
+ case 0xF8:=20
+ case 0xF9:=20
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "psub", Tr=
ue );
+ break;
+
+ case 0xE8:=20
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "psubs", T=
rue );
+ break;
+
+ case 0xD8:=20
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "psubus", =
True );
+ break;
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pmulhw", =
False );
+ break;
+
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pmullw", =
False );
+ break;
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+ vassert(sz =3D=3D 4);
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pmaddwd",=
False );
+ break;
+
+ case 0x74:=20
+ case 0x75:=20
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pcmpeq", =
True );
+ break;
+
+ case 0x64:=20
+ case 0x65:=20
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pcmpgt", =
True );
+ break;
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "packssdw"=
, False );
+ break;
+
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "packsswb"=
, False );
+ break;
+
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "packuswb"=
, False );
+ break;
+
+ case 0x68:=20
+ case 0x69:=20
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "punpckh",=
True );
+ break;
+
+ case 0x60:=20
+ case 0x61:=20
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "punpckl",=
True );
+ break;
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pand", Fa=
lse );
+ break;
+
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pandn", F=
alse );
+ break;
+
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "por", Fal=
se );
+ break;
+
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+ if (sz !=3D 4)=20
+ goto mmx_decode_failure;
+ delta =3D dis_MMXop_regmem_to_reg ( pfx, delta, opc, "pxor", Fa=
lse );
+ break;=20
+
+# define SHIFT_BY_REG(_name,_op) \
+ delta =3D dis_MMX_shiftG_byE(pfx, delta, _name, _op); \
+ break;
+
+ /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
+ case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
+ case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
+
+ /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
+ case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
+ case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
+
+ /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
+ case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
+
+# undef SHIFT_BY_REG
//..=20
-//.. case 0x6F:
-//.. /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. modrm =3D getUChar(delta);
-//.. if (epartIsReg(modrm)) {
-//.. delta++;
-//.. putMMXReg( gregOfRM(modrm), getMMXReg(eregOfRM(modrm)) =
);
-//.. DIP("movq %s, %s\n",=20
-//.. nameMMXReg(eregOfRM(modrm)), nameMMXReg(gregOfRM(mo=
drm)));
-//.. } else {
-//.. IRTemp addr =3D disAMode( &len, sorb, delta, dis_buf );
-//.. delta +=3D len;
-//.. putMMXReg( gregOfRM(modrm), loadLE(Ity_I64, mkexpr(addr=
)) );
-//.. DIP("movq %s, %s\n",=20
-//.. dis_buf, nameMMXReg(gregOfRM(modrm)));
-//.. }
-//.. break;
-//..=20
-//.. case 0x7F:
-//.. /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. modrm =3D getUChar(delta);
-//.. if (epartIsReg(modrm)) {
-//.. /* Fall through. The assembler doesn't appear to gener=
ate
-//.. these. */
-//.. goto mmx_decode_failure;
-//.. } else {
-//.. IRTemp addr =3D disAMode( &len, sorb, delta, dis_buf );
-//.. delta +=3D len;
-//.. storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
-//.. DIP("mov(nt)q %s, %s\n",=20
-//.. nameMMXReg(gregOfRM(modrm)), dis_buf);
-//.. }
-//.. break;
-//..=20
-//.. case 0xFC:=20
-//.. case 0xFD:=20
-//.. case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pad=
d", True );
-//.. break;
-//..=20
-//.. case 0xEC:=20
-//.. case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pad=
ds", True );
-//.. break;
-//..=20
-//.. case 0xDC:=20
-//.. case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pad=
dus", True );
-//.. break;
-//..=20
-//.. case 0xF8:=20
-//.. case 0xF9:=20
-//.. case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psu=
b", True );
-//.. break;
-//..=20
-//.. case 0xE8:=20
-//.. case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psu=
bs", True );
-//.. break;
-//..=20
-//.. case 0xD8:=20
-//.. case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "psu=
bus", True );
-//.. break;
-//..=20
-//.. case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmu=
lhw", False );
-//.. break;
-//..=20
-//.. case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pmu=
llw", False );
-//.. break;
-//..=20
-//.. case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pma=
ddwd", False );
-//.. break;
-//..=20
-//.. case 0x74:=20
-//.. case 0x75:=20
-//.. case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcm=
peq", True );
-//.. break;
-//..=20
-//.. case 0x64:=20
-//.. case 0x65:=20
-//.. case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pcm=
pgt", True );
-//.. break;
-//..=20
-//.. case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pac=
kssdw", False );
-//.. break;
-//..=20
-//.. case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pac=
ksswb", False );
-//.. break;
-//..=20
-//.. case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pac=
kuswb", False );
-//.. break;
-//..=20
-//.. case 0x68:=20
-//.. case 0x69:=20
-//.. case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pun=
pckh", True );
-//.. break;
-//..=20
-//.. case 0x60:=20
-//.. case 0x61:=20
-//.. case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pun=
pckl", True );
-//.. break;
-//..=20
-//.. case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pan=
d", False );
-//.. break;
-//..=20
-//.. case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pan=
dn", False );
-//.. break;
-//..=20
-//.. case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "por=
", False );
-//.. break;
-//..=20
-//.. case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
-//.. if (sz !=3D 4)=20
-//.. goto mmx_decode_failure;
-//.. delta =3D dis_MMXop_regmem_to_reg ( sorb, delta, opc, "pxo=
r", False );
-//.. break;=20
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define SHIFT_BY_REG(_name,_op) =
\
-/.. delta =3D dis_MMX_shiftG_byE(sorb, delta, _name, _op=
); \
-/.. break;
-#endif /* stop gcc multi-line comment warning */
-//..=20
-//.. /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
-//.. case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
-//.. case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
-//..=20
-//.. /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
-//.. case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
-//.. case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
-//..=20
-//.. /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
-//.. case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
-//..=20
-//.. # undef SHIFT_BY_REG
-//..=20
//.. case 0x71:=20
//.. case 0x72:=20
//.. case 0x73: {
@@ -6429,20 +6441,20 @@
//.. # undef SHIFT_BY_IMM
//.. break;
//.. }
-//..=20
-//.. /* --- MMX decode failure --- */
-//.. default:
-//.. mmx_decode_failure:
-//.. *decode_ok =3D False;
-//.. return delta; /* ignored */
-//..=20
-//.. }
-//..=20
-//.. *decode_ok =3D True;
-//.. return delta;
-//.. }
-//..=20
-//..=20
+
+ /* --- MMX decode failure --- */
+ default:
+ mmx_decode_failure:
+ *decode_ok =3D False;
+ return delta; /* ignored */
+
+ }
+
+ *decode_ok =3D True;
+ return delta;
+}
+
+
//.. /*------------------------------------------------------------*/
//.. /*--- More misc arithmetic and other obscure insns. ---*/
//.. /*------------------------------------------------------------*/
@@ -10499,16 +10511,17 @@
//.. "paddd", Iop_Add32x4, False );
//.. goto decode_success;
//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
-//.. /* 0F D4 =3D PADDQ -- add 64x1 */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD4) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "paddq", False );
-//.. goto decode_success;
-//.. }
=20
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F D4 =3D PADDQ -- add 64x1 */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD4) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "paddq", False );
+ goto decode_success;
+ }
+
/* 66 0F D4 =3D PADDQ */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD4) {
@@ -11201,16 +11214,17 @@
//.. "psubd", Iop_Sub32x4, False );
//.. goto decode_success;
//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
-//.. /* 0F FB =3D PSUBQ -- sub 64x1 */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFB) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "psubq", False );
-//.. goto decode_success;
-//.. }
=20
+ /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
+ /* 0F FB =3D PSUBQ -- sub 64x1 */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFB) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "psubq", False );
+ goto decode_success;
+ }
+
/* 66 0F FB =3D PSUBQ */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFB) {
@@ -11583,46 +11597,6 @@
goto decode_failure;
}
=20
-//.. /* ------------------------ INC & DEC ------------------ */
-//..=20
-//.. case 0x40: /* INC eAX */
-//.. case 0x41: /* INC eCX */
-//.. case 0x42: /* INC eDX */
-//.. case 0x43: /* INC eBX */
-//.. case 0x44: /* INC eSP */
-//.. case 0x45: /* INC eBP */
-//.. case 0x46: /* INC eSI */
-//.. case 0x47: /* INC eDI */
-//.. vassert(sz =3D=3D 2 || sz =3D=3D 4);
-//.. ty =3D szToITy(sz);
-//.. t1 =3D newTemp(ty);
-//.. assign( t1, binop(mkSizedOp(ty,Iop_Add8),
-//.. getIReg(sz, (UInt)(opc - 0x40)),
-//.. mkU(ty,1)) );
-//.. setFlags_INC_DEC( True, t1, ty );
-//.. putIReg(sz, (UInt)(opc - 0x40), mkexpr(t1));
-//.. DIP("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40));
-//.. break;
-//..=20
-//.. case 0x48: /* DEC eAX */
-//.. case 0x49: /* DEC eCX */
-//.. case 0x4A: /* DEC eDX */
-//.. case 0x4B: /* DEC eBX */
-//.. case 0x4C: /* DEC eSP */
-//.. case 0x4D: /* DEC eBP */
-//.. case 0x4E: /* DEC eSI */
-//.. case 0x4F: /* DEC eDI */
-//.. vassert(sz =3D=3D 2 || sz =3D=3D 4);
-//.. ty =3D szToITy(sz);
-//.. t1 =3D newTemp(ty);
-//.. assign( t1, binop(mkSizedOp(ty,Iop_Sub8),
-//.. getIReg(sz, (UInt)(opc - 0x48)),
-//.. mkU(ty,1)) );
-//.. setFlags_INC_DEC( False, t1, ty );
-//.. putIReg(sz, (UInt)(opc - 0x48), mkexpr(t1));
-//.. DIP("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48));
-//.. break;
-
/* ------------------------ Jcond, byte offset --------- */
=20
case 0xEB: /* Jb (jump, byte offset) */
@@ -11813,7 +11787,7 @@
//.. sorbTxt(sorb), d32);
//.. break;
=20
-/* XXXX be careful here with moves to AH/BH/CH/DH */
+ /* XXXX be careful here with moves to AH/BH/CH/DH */
case 0xB0: /* MOV imm,AL */
case 0xB1: /* MOV imm,CL */
case 0xB2: /* MOV imm,DL */
@@ -13301,101 +13275,103 @@
break;
}
=20
-//.. /* =3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D- MMXery =3D-=3D-=3D-=3D=
-=3D-=3D-=3D-=3D-=3D-=3D-=3D */
-//..=20
+ /* =3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D- MMXery =3D-=3D-=3D-=3D-=3D=
-=3D-=3D-=3D-=3D-=3D-=3D */
+
//.. case 0x71:=20
//.. case 0x72:=20
//.. case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
//..=20
//.. case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
//.. case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
-//.. case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
-//.. case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xFC:=20
-//.. case 0xFD:=20
-//.. case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xEC:=20
-//.. case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xDC:
-//.. case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xF8:=20
-//.. case 0xF9:=20
-//.. case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xE8:=20
-//.. case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xD8:=20
-//.. case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0x74:=20
-//.. case 0x75:=20
-//.. case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0x64:=20
-//.. case 0x65:=20
-//.. case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0x68:=20
-//.. case 0x69:=20
-//.. case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0x60:=20
-//.. case 0x61:=20
-//.. case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
-//..=20
-//.. case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xF2:=20
-//.. case 0xF3:=20
-//..=20
-//.. case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xD2:=20
-//.. case 0xD3:=20
-//..=20
-//.. case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
-//.. case 0xE2:=20
-//.. {
-//.. ULong delta0 =3D delta-1;
-//.. Bool decode_OK =3D False;
-//..=20
-//.. /* If sz=3D=3D2 this is SSE, and we assume sse idec has
-//.. already spotted those cases by now. */
-//.. if (sz !=3D 4)
-//.. goto decode_failure;
-//..=20
-//.. delta =3D dis_MMX ( &decode_OK, sorb, sz, delta-1 );
-//.. if (!decode_OK) {
-//.. delta =3D delta0;
-//.. goto decode_failure;
-//.. }
-//.. break;
-//.. }
-//..=20
-//.. case 0x77: /* EMMS */
-//.. if (sz !=3D 4)
-//.. goto decode_failure;
-//.. do_EMMS_preamble();
-//.. DIP("emms\n");
-//.. break;
+ case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
+ case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
=20
+ case 0xFC:=20
+ case 0xFD:=20
+ case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xEC:=20
+ case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDC:
+ case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF8:=20
+ case 0xF9:=20
+ case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE8:=20
+ case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xD8:=20
+ case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x74:=20
+ case 0x75:=20
+ case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x64:=20
+ case 0x65:=20
+ case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x68:=20
+ case 0x69:=20
+ case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0x60:=20
+ case 0x61:=20
+ case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
+
+ case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xF2:=20
+ case 0xF3:=20
+
+ case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xD2:=20
+ case 0xD3:=20
+
+ case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
+ case 0xE2:=20
+ {
+ ULong delta0 =3D delta-1;
+ Bool decode_OK =3D False;
+
+ /* If sz=3D=3D2 this is SSE, and we assume sse idec has
+ already spotted those cases by now. */
+ if (sz !=3D 4)
+ goto decode_failure;
+ if (have66orF2orF3(pfx))
+ goto decode_failure;
+
+ delta =3D dis_MMX ( &decode_OK, pfx, sz, delta-1 );
+ if (!decode_OK) {
+ delta =3D delta0;
+ goto decode_failure;
+ }
+ break;
+ }
+
+ case 0x77: /* EMMS */
+ if (sz !=3D 4)
+ goto decode_failure;
+ do_EMMS_preamble();
+ DIP("emms\n");
+ break;
+
/* =3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D-=3D- unimp2 =3D-=3D-=3D-=3D-=3D=
-=3D-=3D-=3D-=3D-=3D-=3D */
=20
default:
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-09 12:16:33 UTC (rev 1172)
+++ trunk/priv/host-amd64/isel.c 2005-05-09 17:52:56 UTC (rev 1173)
@@ -41,7 +41,7 @@
#include "main/vex_util.h"
#include "main/vex_globals.h"
#include "host-generic/h_generic_regs.h"
-//.. #include "host-generic/h_generic_simd64.h"
+#include "host-generic/h_generic_simd64.h"
#include "host-amd64/hdefs.h"
=20
=20
@@ -764,6 +764,7 @@
/* DO NOT CALL THIS DIRECTLY ! */
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
{
+ Bool second_is_UInt;
MatchInfo mi;
DECLARE_PATTERN(p_8Uto64);
DECLARE_PATTERN(p_1Uto8_64to1);
@@ -806,6 +807,8 @@
case Iex_Binop: {
AMD64AluOp aluOp;
AMD64ShiftOp shOp;
+ HWord fn =3D 0; /* helper fn for most SIMD64 stuff */
+
//..=20
//.. /* Pattern: Sub32(0,x) */
//.. if (e->Iex.Binop.op =3D=3D Iop_Sub32 && isZero32(e->Iex.Binop=
.arg1)) {
@@ -919,6 +922,141 @@
return dst;
}
=20
+ /* Deal with 64-bit SIMD binary ops */
+ second_is_UInt =3D False;
+ switch (e->Iex.Binop.op) {
+ case Iop_Add8x8:
+ fn =3D (HWord)h_generic_calc_Add8x8; break;
+ case Iop_Add16x4:
+ fn =3D (HWord)h_generic_calc_Add16x4; break;
+ case Iop_Add32x2:
+ fn =3D (HWord)h_generic_calc_Add32x2; break;
+//..=20
+//.. case Iop_Avg8Ux8:
+//.. fn =3D (HWord)h_generic_calc_Avg8Ux8; break;
+//.. case Iop_Avg16Ux4:
+//.. fn =3D (HWord)h_generic_calc_Avg16Ux4; break;
+
+ case Iop_CmpEQ8x8:
+ fn =3D (HWord)h_generic_calc_CmpEQ8x8; break;
+ case Iop_CmpEQ16x4:
+ fn =3D (HWord)h_generic_calc_CmpEQ16x4; break;
+ case Iop_CmpEQ32x2:
+ fn =3D (HWord)h_generic_calc_CmpEQ32x2; break;
+
+ case Iop_CmpGT8Sx8:
+ fn =3D (HWord)h_generic_calc_CmpGT8Sx8; break;
+ case Iop_CmpGT16Sx4:
+ fn =3D (HWord)h_generic_calc_CmpGT16Sx4; break;
+ case Iop_CmpGT32Sx2:
+ fn =3D (HWord)h_generic_calc_CmpGT32Sx2; break;
+
+ case Iop_InterleaveHI8x8:
+ fn =3D (HWord)h_generic_calc_InterleaveHI8x8; break;
+ case Iop_InterleaveLO8x8:
+ fn =3D (HWord)h_generic_calc_InterleaveLO8x8; break;
+ case Iop_InterleaveHI16x4:
+ fn =3D (HWord)h_generic_calc_InterleaveHI16x4; break;
+ case Iop_InterleaveLO16x4:
+ fn =3D (HWord)h_generic_calc_InterleaveLO16x4; break;
+ case Iop_InterleaveHI32x2:
+ fn =3D (HWord)h_generic_calc_InterleaveHI32x2; break;
+ case Iop_InterleaveLO32x2:
+ fn =3D (HWord)h_generic_calc_InterleaveLO32x2; break;
+
+//.. case Iop_Max8Ux8:
+//.. fn =3D (HWord)h_generic_calc_Max8Ux8; break;
+//.. case Iop_Max16Sx4:
+//.. fn =3D (HWord)h_generic_calc_Max16Sx4; break;
+//.. case Iop_Min8Ux8:
+//.. fn =3D (HWord)h_generic_calc_Min8Ux8; break;
+//.. case Iop_Min16Sx4:
+//.. fn =3D (HWord)h_generic_calc_Min16Sx4; break;
+
+ case Iop_Mul16x4:
+ fn =3D (HWord)h_generic_calc_Mul16x4; break;
+ case Iop_MulHi16Sx4:
+ fn =3D (HWord)h_generic_calc_MulHi16Sx4; break;
+//.. case Iop_MulHi16Ux4:
+//.. fn =3D (HWord)h_generic_calc_MulHi16Ux4; break;
+//..=20
+ case Iop_QAdd8Sx8:
+ fn =3D (HWord)h_generic_calc_QAdd8Sx8; break;
+ case Iop_QAdd16Sx4:
+ fn =3D (HWord)h_generic_calc_QAdd16Sx4; break;
+ case Iop_QAdd8Ux8:
+ fn =3D (HWord)h_generic_calc_QAdd8Ux8; break;
+ case Iop_QAdd16Ux4:
+ fn =3D (HWord)h_generic_calc_QAdd16Ux4; break;
+
+ case Iop_QNarrow32Sx2:
+ fn =3D (HWord)h_generic_calc_QNarrow32Sx2; break;
+ case Iop_QNarrow16Sx4:
+ fn =3D (HWord)h_generic_calc_QNarrow16Sx4; break;
+ case Iop_QNarrow16Ux4:
+ fn =3D (HWord)h_generic_calc_QNarrow16Ux4; break;
+
+ case Iop_QSub8Sx8:
+ fn =3D (HWord)h_generic_calc_QSub8Sx8; break;
+ case Iop_QSub16Sx4:
+ fn =3D (HWord)h_generic_calc_QSub16Sx4; break;
+ case Iop_QSub8Ux8:
+ fn =3D (HWord)h_generic_calc_QSub8Ux8; break;
+ case Iop_QSub16Ux4:
+ fn =3D (HWord)h_generic_calc_QSub16Ux4; break;
+
+ case Iop_Sub8x8:
+ fn =3D (HWord)h_generic_calc_Sub8x8; break;
+ case Iop_Sub16x4:
+ fn =3D (HWord)h_generic_calc_Sub16x4; break;
+ case Iop_Sub32x2:
+ fn =3D (HWord)h_generic_calc_Sub32x2; break;
+
+ case Iop_ShlN32x2:
+ fn =3D (HWord)h_generic_calc_ShlN32x2;=20
+ second_is_UInt =3D True;
+ break;
+ case Iop_ShlN16x4:
+ fn =3D (HWord)h_generic_calc_ShlN16x4;
+ second_is_UInt =3D True;
+ break;
+ case Iop_ShrN32x2:
+ fn =3D (HWord)h_generic_calc_ShrN32x2;=20
+ second_is_UInt =3D True;=20
+ break;
+ case Iop_ShrN16x4:
+ fn =3D (HWord)h_generic_calc_ShrN16x4;
+ second_is_UInt =3D True;=20
+ break;
+ case Iop_SarN32x2:
+ fn =3D (HWord)h_generic_calc_SarN32x2;
+ second_is_UInt =3D True;=20
+ break;
+ case Iop_SarN16x4:
+ fn =3D (HWord)h_generic_calc_SarN16x4;
+ second_is_UInt =3D True;=20
+ break;
+
+ default:
+ fn =3D (HWord)0; break;
+ }
+ if (fn !=3D (HWord)0) {
+ /* Note: the following assumes all helpers are of signature=20
+ ULong fn ( ULong, ULong ), and they are
+ not marked as regparm functions.=20
+ */
+ HReg dst =3D newVRegI(env);
+ HReg argL =3D iselIntExpr_R(env, e->Iex.Binop.arg1);
+ HReg argR =3D iselIntExpr_R(env, e->Iex.Binop.arg2);
+ if (second_is_UInt)
+ addInstr(env, AMD64Instr_MovZLQ(argR, argR));
+ addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
+ addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
+ addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2 ));
+ addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
+ return dst;
+ }
+
/* Handle misc other ops. */
=20
if (e->Iex.Binop.op =3D=3D Iop_DivModS64to32
@@ -2320,152 +2458,6 @@
//.. return;
//.. }
//..=20
-//.. case Iop_Add8x8:
-//.. fn =3D (HWord)h_generic_calc_Add8x8; goto binnish;
-//.. case Iop_Add16x4:
-//.. fn =3D (HWord)h_generic_calc_Add16x4; goto binnish;
-//.. case Iop_Add32x2:
-//.. fn =3D (HWord)h_generic_calc_Add32x2; goto binnish;
-//..=20
-//.. case Iop_Avg8Ux8:
-//.. fn =3D (HWord)h_generic_calc_Avg8Ux8; goto binnish;
-//.. case Iop_Avg16Ux4:
-//.. fn =3D (HWord)h_generic_calc_Avg16Ux4; goto binnish;
-//..=20
-//.. case Iop_CmpEQ8x8:
-//.. fn =3D (HWord)h_generic_calc_CmpEQ8x8; goto binnish;
-//.. case Iop_CmpEQ16x4:
-//.. fn =3D (HWord)h_generic_calc_CmpEQ16x4; goto binnish;
-//.. case Iop_CmpEQ32x2:
-//.. fn =3D (HWord)h_generic_calc_CmpEQ32x2; goto binnish;
-//..=20
-//.. case Iop_CmpGT8Sx8:
-//.. fn =3D (HWord)h_generic_calc_CmpGT8Sx8; goto binnish;
-//.. case Iop_CmpGT16Sx4:
-//.. fn =3D (HWord)h_generic_calc_CmpGT16Sx4; goto binnish;
-//.. case Iop_CmpGT32Sx2:
-//.. fn =3D (HWord)h_generic_calc_CmpGT32Sx2; goto binnish;
-//..=20
-//.. case Iop_InterleaveHI8x8:
-//.. fn =3D (HWord)h_generic_calc_InterleaveHI8x8; goto binn=
ish;
-//.. case Iop_InterleaveLO8x8:
-//.. fn =3D (HWord)h_generic_calc_InterleaveLO8x8; goto binn=
ish;
-//.. case Iop_InterleaveHI16x4:
-//.. fn =3D (HWord)h_generic_calc_InterleaveHI16x4; goto bin=
nish;
-//.. case Iop_InterleaveLO16x4:
-//.. fn =3D (HWord)h_generic_calc_InterleaveLO16x4; goto bin=
nish;
-//.. case Iop_InterleaveHI32x2:
-//.. fn =3D (HWord)h_generic_calc_InterleaveHI32x2; goto bin=
nish;
-//.. case Iop_InterleaveLO32x2:
-//.. fn =3D (HWord)h_generi...
[truncated message content] |