|
From: <sv...@va...> - 2005-05-10 20:08:40
|
Author: sewardj
Date: 2005-05-10 21:08:34 +0100 (Tue, 10 May 2005)
New Revision: 1177
Modified:
trunk/priv/guest-amd64/gdefs.h
trunk/priv/guest-amd64/ghelpers.c
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/isel.c
Log:
First pass through SSE1 instructions.
Modified: trunk/priv/guest-amd64/gdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/gdefs.h 2005-05-10 02:50:05 UTC (rev 1176)
+++ trunk/priv/guest-amd64/gdefs.h 2005-05-10 20:08:34 UTC (rev 1177)
@@ -117,8 +117,8 @@
=20
extern ULong amd64g_calculate_mmx_pmaddwd ( ULong, ULong );
extern ULong amd64g_calculate_mmx_psadbw ( ULong, ULong );
-extern UInt amd64g_calculate_mmx_pmovmskb ( ULong );
-extern UInt amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
+extern ULong amd64g_calculate_mmx_pmovmskb ( ULong );
+extern ULong amd64g_calculate_sse_pmovmskb ( ULong w64hi, ULong w64lo );
=20
=20
/* --- DIRTY HELPERS --- */
Modified: trunk/priv/guest-amd64/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/ghelpers.c 2005-05-10 02:50:05 UTC (rev 1176)
+++ trunk/priv/guest-amd64/ghelpers.c 2005-05-10 20:08:34 UTC (rev 1177)
@@ -1387,6 +1387,10 @@
/*--- Helpers for MMX/SSE/SSE2. ---*/
/*---------------------------------------------------------------*/
=20
+static inline UChar abdU8 ( UChar xx, UChar yy ) {
+ return toUChar(xx>yy ? xx-yy : yy-xx);
+}
+
static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
return (((ULong)w1) << 32) | ((ULong)w0);
}
@@ -1408,6 +1412,39 @@
return toUShort(lo32);
}
=20
+static inline UChar sel8x8_7 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUChar(hi32 >> 24);
+}
+static inline UChar sel8x8_6 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUChar(hi32 >> 16);
+}
+static inline UChar sel8x8_5 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUChar(hi32 >> 8);
+}
+static inline UChar sel8x8_4 ( ULong w64 ) {
+ UInt hi32 =3D toUInt(w64 >> 32);
+ return toUChar(hi32 >> 0);
+}
+static inline UChar sel8x8_3 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUChar(lo32 >> 24);
+}
+static inline UChar sel8x8_2 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUChar(lo32 >> 16);
+}
+static inline UChar sel8x8_1 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUChar(lo32 >> 8);
+}
+static inline UChar sel8x8_0 ( ULong w64 ) {
+ UInt lo32 =3D toUInt(w64);
+ return toUChar(lo32 >> 0);
+}
+
/* CALLED FROM GENERATED CODE: CLEAN HELPER */
ULong amd64g_calculate_mmx_pmaddwd ( ULong xx, ULong yy )
{
@@ -1420,7 +1457,38 @@
);
}
=20
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_pmovmskb ( ULong xx )
+{
+ ULong r =3D 0;
+ if (xx & (1ULL << (64-1))) r |=3D (1<<7);
+ if (xx & (1ULL << (56-1))) r |=3D (1<<6);
+ if (xx & (1ULL << (48-1))) r |=3D (1<<5);
+ if (xx & (1ULL << (40-1))) r |=3D (1<<4);
+ if (xx & (1ULL << (32-1))) r |=3D (1<<3);
+ if (xx & (1ULL << (24-1))) r |=3D (1<<2);
+ if (xx & (1ULL << (16-1))) r |=3D (1<<1);
+ if (xx & (1ULL << ( 8-1))) r |=3D (1<<0);
+ return r;
+}
=20
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+ULong amd64g_calculate_mmx_psadbw ( ULong xx, ULong yy )
+{
+ UInt t =3D 0;
+ t +=3D (UInt)abdU8( sel8x8_7(xx), sel8x8_7(yy) );
+ t +=3D (UInt)abdU8( sel8x8_6(xx), sel8x8_6(yy) );
+ t +=3D (UInt)abdU8( sel8x8_5(xx), sel8x8_5(yy) );
+ t +=3D (UInt)abdU8( sel8x8_4(xx), sel8x8_4(yy) );
+ t +=3D (UInt)abdU8( sel8x8_3(xx), sel8x8_3(yy) );
+ t +=3D (UInt)abdU8( sel8x8_2(xx), sel8x8_2(yy) );
+ t +=3D (UInt)abdU8( sel8x8_1(xx), sel8x8_1(yy) );
+ t +=3D (UInt)abdU8( sel8x8_0(xx), sel8x8_0(yy) );
+ t &=3D 0xFFFF;
+ return (ULong)t;
+}
+
+
/*---------------------------------------------------------------*/
/*--- Helpers for dealing with, and describing, ---*/
/*--- guest state as a whole. ---*/
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-10 02:50:05 UTC (rev 1176)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-10 20:08:34 UTC (rev 1177)
@@ -1200,6 +1200,22 @@
}
=20
=20
+/* Simplistic functions to deal with the lower quarters of integer
+ registers as a straightforward bank of 16 16-bit regs. */
+
+static IRExpr* getIReg16 ( UInt regno )
+{
+ vassert(!host_is_bigendian);
+ return IRExpr_Get( integerGuestReg64Offset(regno),
+ Ity_I16 );
+}
+
+static HChar* nameIReg16 ( UInt regno )
+{
+ return nameIReg( 2, regno, False );
+}
+
+
/* Sometimes what we know is a 3-bit register number, a REX byte, and
which field of the REX byte is to be used to extend to a 4-bit
number. These functions cater for that situation. =20
@@ -5948,7 +5964,7 @@
case 0xEA: op =3D Iop_Min16Sx4; break;
case 0xDA: op =3D Iop_Min8Ux8; break;
case 0xE4: op =3D Iop_MulHi16Ux4; break;
- // case 0xF6: XXX(x86g_calculate_mmx_psadbw); break;
+ case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
=20
/* Introduced in SSE2 */
case 0xD4: op =3D Iop_Add64; break;
@@ -7389,79 +7405,79 @@
}
=20
=20
-//.. /* All lanes unary SSE operation, G =3D op(E). */
-//..=20
-//.. static UInt dis_SSE_E_to_G_unary_all (=20
-//.. UChar sorb, ULong delta,=20
-//.. HChar* opname, IROp op
-//.. )
-//.. {
-//.. HChar dis_buf[50];
-//.. Int alen;
-//.. IRTemp addr;
-//.. UChar rm =3D getUChar(delta);
-//.. if (epartIsReg(rm)) {
-//.. putXMMReg( gregOfRM(rm),=20
-//.. unop(op, getXMMReg(eregOfRM(rm))) );
-//.. DIP("%s %s,%s\n", opname,
-//.. nameXMMReg(eregOfRM(rm)),
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. return delta+1;
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta, dis_buf );
-//.. putXMMReg( gregOfRM(rm),=20
-//.. unop(op, loadLE(Ity_V128, mkexpr(addr))) );
-//.. DIP("%s %s,%s\n", opname,
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. return delta+alen;
-//.. }
-//.. }
-//..=20
-//..=20
-//.. /* Lowest 32-bit lane only unary SSE operation, G =3D op(E). */
-//..=20
-//.. static UInt dis_SSE_E_to_G_unary_lo32 (=20
-//.. UChar sorb, ULong delta,=20
-//.. HChar* opname, IROp op
-//.. )
-//.. {
-//.. /* First we need to get the old G value and patch the low 32 bit=
s
-//.. of the E operand into it. Then apply op and write back to G.=
*/
-//.. HChar dis_buf[50];
-//.. Int alen;
-//.. IRTemp addr;
-//.. UChar rm =3D getUChar(delta);
-//.. IRTemp oldG0 =3D newTemp(Ity_V128);
-//.. IRTemp oldG1 =3D newTemp(Ity_V128);
-//..=20
-//.. assign( oldG0, getXMMReg(gregOfRM(rm)) );
-//..=20
-//.. if (epartIsReg(rm)) {
-//.. assign( oldG1,=20
-//.. binop( Iop_Set128lo32,
-//.. mkexpr(oldG0),
-//.. getXMMRegLane32(eregOfRM(rm), 0)) );
-//.. putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
-//.. DIP("%s %s,%s\n", opname,
-//.. nameXMMReg(eregOfRM(rm)),
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. return delta+1;
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta, dis_buf );
-//.. assign( oldG1,=20
-//.. binop( Iop_Set128lo32,
-//.. mkexpr(oldG0),
-//.. loadLE(Ity_I32, mkexpr(addr)) ));
-//.. putXMMReg( gregOfRM(rm), unop(op, mkexpr(oldG1)) );
-//.. DIP("%s %s,%s\n", opname,
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(rm)) );
-//.. return delta+alen;
-//.. }
-//.. }
+/* All lanes unary SSE operation, G =3D op(E). */
=20
+static ULong dis_SSE_E_to_G_unary_all (=20
+ Prefix pfx, ULong delta,=20
+ HChar* opname, IROp op
+ )
+{
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm =3D getUChar(delta);
+ if (epartIsReg(rm)) {
+ putXMMReg( gregOfRexRM(pfx,rm),=20
+ unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,rm),=20
+ unop(op, loadLE(Ity_V128, mkexpr(addr))) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
=20
+
+/* Lowest 32-bit lane only unary SSE operation, G =3D op(E). */
+
+static ULong dis_SSE_E_to_G_unary_lo32 (=20
+ Prefix pfx, ULong delta,=20
+ HChar* opname, IROp op
+ )
+{
+ /* First we need to get the old G value and patch the low 32 bits
+ of the E operand into it. Then apply op and write back to G. */
+ HChar dis_buf[50];
+ Int alen;
+ IRTemp addr;
+ UChar rm =3D getUChar(delta);
+ IRTemp oldG0 =3D newTemp(Ity_V128);
+ IRTemp oldG1 =3D newTemp(Ity_V128);
+
+ assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
+
+ if (epartIsReg(rm)) {
+ assign( oldG1,=20
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ nameXMMReg(eregOfRexRM(pfx,rm)),
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta, dis_buf, 0 );
+ assign( oldG1,=20
+ binop( Iop_SetV128lo32,
+ mkexpr(oldG0),
+ loadLE(Ity_I32, mkexpr(addr)) ));
+ putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
+ DIP("%s %s,%s\n", opname,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,rm)) );
+ return delta+alen;
+ }
+}
+
+
/* Lowest 64-bit lane only unary SSE operation, G =3D op(E). */
=20
static ULong dis_SSE_E_to_G_unary_lo64 (=20
@@ -7796,85 +7812,85 @@
unop(Iop_32Uto64,sseround) ) );
}
=20
-//.. /* Break a 128-bit value up into four 32-bit ints. */
-//..=20
-//.. static void breakup128to32s ( IRTemp t128,
-//.. /*OUTs*/
-//.. IRTemp* t3, IRTemp* t2,
-//.. IRTemp* t1, IRTemp* t0 )
-//.. {
-//.. IRTemp hi64 =3D newTemp(Ity_I64);
-//.. IRTemp lo64 =3D newTemp(Ity_I64);
-//.. assign( hi64, unop(Iop_128HIto64, mkexpr(t128)) );
-//.. assign( lo64, unop(Iop_128to64, mkexpr(t128)) );
-//..=20
-//.. vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
-//.. vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
-//.. vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
-//.. vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
-//..=20
-//.. *t0 =3D newTemp(Ity_I32);
-//.. *t1 =3D newTemp(Ity_I32);
-//.. *t2 =3D newTemp(Ity_I32);
-//.. *t3 =3D newTemp(Ity_I32);
-//.. assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
-//.. assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
-//.. assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
-//.. assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
-//.. }
-//..=20
-//.. /* Construct a 128-bit value from four 32-bit ints. */
-//..=20
-//.. static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
-//.. IRTemp t1, IRTemp t0 )
-//.. {
-//.. return
-//.. binop( Iop_64HLto128,
-//.. binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
-//.. binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
-//.. );
-//.. }
-//..=20
-//.. /* Break a 64-bit value up into four 16-bit ints. */
-//..=20
-//.. static void breakup64to16s ( IRTemp t64,
-//.. /*OUTs*/
-//.. IRTemp* t3, IRTemp* t2,
-//.. IRTemp* t1, IRTemp* t0 )
-//.. {
-//.. IRTemp hi32 =3D newTemp(Ity_I32);
-//.. IRTemp lo32 =3D newTemp(Ity_I32);
-//.. assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
-//.. assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
-//..=20
-//.. vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
-//.. vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
-//.. vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
-//.. vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
-//..=20
-//.. *t0 =3D newTemp(Ity_I16);
-//.. *t1 =3D newTemp(Ity_I16);
-//.. *t2 =3D newTemp(Ity_I16);
-//.. *t3 =3D newTemp(Ity_I16);
-//.. assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
-//.. assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
-//.. assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
-//.. assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
-//.. }
-//..=20
-//.. /* Construct a 64-bit value from four 16-bit ints. */
-//..=20
-//.. static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
-//.. IRTemp t1, IRTemp t0 )
-//.. {
-//.. return
-//.. binop( Iop_32HLto64,
-//.. binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
-//.. binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
-//.. );
-//.. }
+/* Break a 128-bit value up into four 32-bit ints. */
=20
+static void breakup128to32s ( IRTemp t128,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi64 =3D newTemp(Ity_I64);
+ IRTemp lo64 =3D newTemp(Ity_I64);
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
=20
+ vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
+ vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
+ vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
+ vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
+
+ *t0 =3D newTemp(Ity_I32);
+ *t1 =3D newTemp(Ity_I32);
+ *t2 =3D newTemp(Ity_I32);
+ *t3 =3D newTemp(Ity_I32);
+ assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
+ assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+ assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
+ assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+}
+
+/* Construct a 128-bit value from four 32-bit ints. */
+
+static IRExpr* mk128from32s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_64HLtoV128,
+ binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+/* Break a 64-bit value up into four 16-bit ints. */
+
+static void breakup64to16s ( IRTemp t64,
+ /*OUTs*/
+ IRTemp* t3, IRTemp* t2,
+ IRTemp* t1, IRTemp* t0 )
+{
+ IRTemp hi32 =3D newTemp(Ity_I32);
+ IRTemp lo32 =3D newTemp(Ity_I32);
+ assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
+ assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
+
+ vassert(t0 && *t0 =3D=3D IRTemp_INVALID);
+ vassert(t1 && *t1 =3D=3D IRTemp_INVALID);
+ vassert(t2 && *t2 =3D=3D IRTemp_INVALID);
+ vassert(t3 && *t3 =3D=3D IRTemp_INVALID);
+
+ *t0 =3D newTemp(Ity_I16);
+ *t1 =3D newTemp(Ity_I16);
+ *t2 =3D newTemp(Ity_I16);
+ *t3 =3D newTemp(Ity_I16);
+ assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
+ assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
+ assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
+ assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
+}
+
+/* Construct a 64-bit value from four 16-bit ints. */
+
+static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
+ IRTemp t1, IRTemp t0 )
+{
+ return
+ binop( Iop_32HLto64,
+ binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
+ binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
+ );
+}
+
+
/*------------------------------------------------------------*/
/*--- Disassemble a single instruction ---*/
/*------------------------------------------------------------*/
@@ -7895,7 +7911,7 @@
/*OUT*/ Addr64* whereNext )
{
IRType ty;
- IRTemp addr, t0, t1, t2, t3, t4 /*, t5, t6 */;
+ IRTemp addr, t0, t1, t2, t3, t4, t5, t6;
Int alen;
UChar opc, modrm, /*abyte,*/ pre;
Long d64;
@@ -7923,7 +7939,7 @@
vassert(guest_rip_next_assumed =3D=3D 0);
vassert(guest_rip_next_mustcheck =3D=3D False);
=20
- addr =3D t0 =3D t1 =3D t2 =3D t3 =3D t4 =3D /* t5 =3D t6 =3D */ IRTem=
p_INVALID;=20
+ addr =3D t0 =3D t1 =3D t2 =3D t3 =3D t4 =3D t5 =3D t6 =3D IRTemp_INVA=
LID;=20
=20
DIP("\t0x%llx: ", guest_rip_bbstart+delta);
=20
@@ -8337,10 +8353,10 @@
=20
/* 0F 2D =3D CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
I32 in mmx, according to prevailing SSE rounding mode */
-//.. /* 0F 2C =3D CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to=
2 x
-//.. I32 in mmx, rounding towards zero */
+ /* 0F 2C =3D CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
+ I32 in mmx, rounding towards zero */
if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
- && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x2D /* || insn[1] =3D=3D=
0x2C */)) {
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x2D || insn[1] =3D=3D =
0x2C)) {
IRTemp dst64 =3D newTemp(Ity_I64);
IRTemp rmode =3D newTemp(Ity_I32);
IRTemp f32lo =3D newTemp(Ity_F32);
@@ -8656,77 +8672,80 @@
/* else fall through */
}
=20
-//.. /* 0F 50 =3D MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
-//.. to 4 lowest bits of ireg(G) */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x50) {
-//.. modrm =3D getUChar(delta+2);
-//.. if (sz =3D=3D 4 && epartIsReg(modrm)) {
-//.. Int src;
-//.. t0 =3D newTemp(Ity_I32);
-//.. t1 =3D newTemp(Ity_I32);
-//.. t2 =3D newTemp(Ity_I32);
-//.. t3 =3D newTemp(Ity_I32);
-//.. delta +=3D 2+1;
-//.. src =3D eregOfRM(modrm);
-//.. assign( t0, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,0)=
, mkU8(31)),
-//.. mkU32(1) ));
-//.. assign( t1, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,1)=
, mkU8(30)),
-//.. mkU32(2) ));
-//.. assign( t2, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,2)=
, mkU8(29)),
-//.. mkU32(4) ));
-//.. assign( t3, binop( Iop_And32,
-//.. binop(Iop_Shr32, getXMMRegLane32(src,3)=
, mkU8(28)),
-//.. mkU32(8) ));
-//.. putIReg(4, gregOfRM(modrm),
-//.. binop(Iop_Or32,
-//.. binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
-//.. binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
-//.. )
-//.. );
-//.. DIP("movmskps %s,%s\n", nameXMMReg(src),=20
-//.. nameIReg(4, gregOfRM(modrm)));
-//.. goto decode_success;
-//.. }
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* 0F 2B =3D MOVNTPS -- for us, just a plain SSE store. */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2B) {
-//.. modrm =3D getUChar(delta+2);
-//.. if (!epartIsReg(modrm)) {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
-//.. DIP("movntps %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. delta +=3D 2+alen;
-//.. goto decode_success;
-//.. }
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F E7 =3D MOVNTQ -- for us, just a plain MMX store. Note, th=
e
-//.. Intel manual does not say anything about the usual business o=
f
-//.. the FP reg tags getting trashed whenever an MMX insn happens.
-//.. So we just leave them alone.=20
-//.. */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE7) {
-//.. modrm =3D getUChar(delta+2);
-//.. if (sz =3D=3D 4 && !epartIsReg(modrm)) {
-//.. /* do_MMX_preamble(); Intel docs don't specify this */
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
-//.. DIP("movntq %s,%s\n", dis_buf,
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. delta +=3D 2+alen;
-//.. goto decode_success;
-//.. }
-//.. /* else fall through */
-//.. }
+ /* 0F 50 =3D MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
+ to 4 lowest bits of ireg(G) */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x50) {
+ modrm =3D getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ Int src;
+ t0 =3D newTemp(Ity_I32);
+ t1 =3D newTemp(Ity_I32);
+ t2 =3D newTemp(Ity_I32);
+ t3 =3D newTemp(Ity_I32);
+ delta +=3D 2+1;
+ src =3D eregOfRexRM(pfx,modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU=
8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU=
8(30)),
+ mkU32(2) ));
+ assign( t2, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU=
8(29)),
+ mkU32(4) ));
+ assign( t3, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU=
8(28)),
+ mkU32(8) ));
+ putIReg32( gregOfRexRM(pfx,modrm),
+ binop(Iop_Or32,
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
+ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
+ )
+ );
+ DIP("movmskps %s,%s\n", nameXMMReg(src),=20
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ goto decode_success;
+ }
+ /* else fall through */
+ }
=20
+ /* 0F 2B =3D MOVNTPS -- for us, just a plain SSE store. */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x2B) {
+ modrm =3D getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
+ DIP("movntps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E7 =3D MOVNTQ -- for us, just a plain MMX store. Note, the
+ Intel manual does not say anything about the usual business of
+ the FP reg tags getting trashed whenever an MMX insn happens.
+ So we just leave them alone.=20
+ */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE7) {
+ modrm =3D getUChar(delta+2);
+ if (!epartIsReg(modrm)) {
+ /* do_MMX_preamble(); Intel docs don't specify this */
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
+ DIP("movntq %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ delta +=3D 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
+ }
+
/* F3 0F 10 =3D MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
(lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
if (haveF3no66noF2(pfx) && sz =3D=3D 4
@@ -8789,162 +8808,180 @@
goto decode_success;
}
=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F E0 =3D PAVGB -- 8x8 unsigned Packed Average, with rounding=
*/
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE0) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pavgb", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F E3 =3D PAVGW -- 16x4 unsigned Packed Average, with roundin=
g */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE3) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pavgw", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F C5 =3D PEXTRW -- extract 16-bit field from mmx(E) and put=20
-//.. zero-extend of it in ireg(G). */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC5) {
-//.. modrm =3D insn[2];
-//.. if (sz =3D=3D 4 && epartIsReg(modrm)) {
-//.. IRTemp sV =3D newTemp(Ity_I64);
-//.. t5 =3D newTemp(Ity_I16);
-//.. do_MMX_preamble();
-//.. assign(sV, getMMXReg(eregOfRM(modrm)));
-//.. breakup64to16s( sV, &t3, &t2, &t1, &t0 );
-//.. switch (insn[3] & 3) {
-//.. case 0: assign(t5, mkexpr(t0)); break;
-//.. case 1: assign(t5, mkexpr(t1)); break;
-//.. case 2: assign(t5, mkexpr(t2)); break;
-//.. case 3: assign(t5, mkexpr(t3)); break;
-//.. default: vassert(0);
-//.. }
-//.. putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)))=
;
-//.. DIP("pextrw $%d,%s,%s\n",
-//.. (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
-//.. nameIReg(4,gregOfRM(modrm)));
-//.. delta +=3D 4;
-//.. goto decode_success;
-//.. }=20
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F C4 =3D PINSRW -- get 16 bits from E(mem or low half ireg) =
and
-//.. put it into the specified lane of mmx(G). */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC4) {
-//.. /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
-//.. mmx reg. t4 is the new lane value. t5 is the original
-//.. mmx value. t6 is the new mmx value. */
-//.. Int lane;
-//.. t4 =3D newTemp(Ity_I16);
-//.. t5 =3D newTemp(Ity_I64);
-//.. t6 =3D newTemp(Ity_I64);
-//.. modrm =3D insn[2];
-//.. do_MMX_preamble();
-//..=20
-//.. assign(t5, getMMXReg(gregOfRM(modrm)));
-//.. breakup64to16s( t5, &t3, &t2, &t1, &t0 );
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign(t4, getIReg(2, eregOfRM(modrm)));
-//.. lane =3D insn[3];
-//.. delta +=3D 2+2;
-//.. DIP("pinsrw $%d,%s,%s\n", (Int)lane,=20
-//.. nameIReg(2,eregOfRM(modrm)),
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. } else {
-//.. /* awaiting test case */
-//.. goto decode_failure;
-//.. }
-//..=20
-//.. switch (lane & 3) {
-//.. case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
-//.. case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
-//.. case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
-//.. case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
-//.. default: vassert(0);
-//.. }
-//.. putMMXReg(gregOfRM(modrm), mkexpr(t6));
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F EE =3D PMAXSW -- 16x4 signed max */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEE) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pmaxsw", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F DE =3D PMAXUB -- 8x8 unsigned max */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDE) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pmaxub", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F EA =3D PMINSW -- 16x4 signed min */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEA) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pminsw", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F DA =3D PMINUB -- 8x8 unsigned min */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDA) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pminub", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F D7 =3D PMOVMSKB -- extract sign bits from each of 8 lanes =
in
-//.. mmx(G), turn them into a byte, and put zero-extend of it in
-//.. ireg(G). */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD7) {
-//.. modrm =3D insn[2];
-//.. if (epartIsReg(modrm)) {
-//.. do_MMX_preamble();
-//.. t0 =3D newTemp(Ity_I64);
-//.. t1 =3D newTemp(Ity_I32);
-//.. assign(t0, getMMXReg(eregOfRM(modrm)));
-//.. assign(t1, mkIRExprCCall(
-//.. Ity_I32, 0/*regparms*/,=20
-//.. "x86g_calculate_mmx_pmovmskb",
-//.. &x86g_calculate_mmx_pmovmskb,
-//.. mkIRExprVec_1(mkexpr(t0))));
-//.. putIReg(4, gregOfRM(modrm), mkexpr(t1));
-//.. DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
-//.. nameIReg(4,gregOfRM(modrm)));
-//.. delta +=3D 3;
-//.. goto decode_success;
-//.. }=20
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F E4 =3D PMULUH -- 16x4 hi-half of unsigned widening multipl=
y */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE4) {
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "pmuluh", False );
-//.. goto decode_success;
-//.. }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E0 =3D PAVGB -- 8x8 unsigned Packed Average, with rounding */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE0) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pavgb", False );
+ goto decode_success;
+ }
=20
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E3 =3D PAVGW -- 16x4 unsigned Packed Average, with rounding */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE3) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pavgw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C5 =3D PEXTRW -- extract 16-bit field from mmx(E) and put=20
+ zero-extend of it in ireg(G). */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC5) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ IRTemp sV =3D newTemp(Ity_I64);
+ t5 =3D newTemp(Ity_I16);
+ do_MMX_preamble();
+ assign(sV, getMMXReg(eregLO3ofRM(modrm)));
+ breakup64to16s( sV, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 3) {
+ case 0: assign(t5, mkexpr(t0)); break;
+ case 1: assign(t5, mkexpr(t1)); break;
+ case 2: assign(t5, mkexpr(t2)); break;
+ case 3: assign(t5, mkexpr(t3)); break;
+ default: vassert(0);
+ }
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5))=
);
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameMMXReg(eregLO3ofRM(modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta +=3D 4;
+ goto decode_success;
+ }=20
+ /* else fall through */
+ /* note, for anyone filling in the mem case: this insn has one
+ byte after the amode and therefore you must pass 1 as the
+ last arg to disAMode */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C4 =3D PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of mmx(G). */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC4) {
+ /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
+ mmx reg. t4 is the new lane value. t5 is the original
+ mmx value. t6 is the new mmx value. */
+ Int lane;
+ t4 =3D newTemp(Ity_I16);
+ t5 =3D newTemp(Ity_I64);
+ t6 =3D newTemp(Ity_I64);
+ modrm =3D insn[2];
+ do_MMX_preamble();
+
+ assign(t5, getMMXReg(gregLO3ofRM(modrm)));
+ breakup64to16s( t5, &t3, &t2, &t1, &t0 );
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
+ delta +=3D 3+1;
+ lane =3D insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,=20
+ nameIReg16(eregOfRexRM(pfx,modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 1 );
+ delta +=3D 3+alen;
+ lane =3D insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ switch (lane & 3) {
+ case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
+ case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
+ case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
+ case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
+ default: vassert(0);
+ }
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EE =3D PMAXSW -- 16x4 signed max */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEE) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pmaxsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DE =3D PMAXUB -- 8x8 unsigned max */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDE) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pmaxub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EA =3D PMINSW -- 16x4 signed min */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEA) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pminsw", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DA =3D PMINUB -- 8x8 unsigned min */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xDA) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pminub", False );
+ goto decode_success;
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F D7 =3D PMOVMSKB -- extract sign bits from each of 8 lanes in
+ mmx(G), turn them into a byte, and put zero-extend of it in
+ ireg(G). */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD7) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ t0 =3D newTemp(Ity_I64);
+ t1 =3D newTemp(Ity_I64);
+ assign(t0, getMMXReg(eregLO3ofRM(modrm)));
+ assign(t1, mkIRExprCCall(
+ Ity_I64, 0/*regparms*/,=20
+ "amd64g_calculate_mmx_pmovmskb",
+ &amd64g_calculate_mmx_pmovmskb,
+ mkIRExprVec_1(mkexpr(t0))));
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_64to32,mkexpr(t1)));
+ DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta +=3D 3;
+ goto decode_success;
+ }=20
+ /* else fall through */
+ }
+
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E4 =3D PMULUH -- 16x4 hi-half of unsigned widening multiply */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE4) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "pmuluh", False );
+ goto decode_success;
+ }
+
/* 0F 18 /0 =3D PREFETCHNTA -- prefetch into caches, */
/* 0F 18 /1 =3D PREFETCH0 -- with various different hints */
/* 0F 18 /2 =3D PREFETCH1 */
@@ -8973,89 +9010,88 @@
goto decode_success;
}
=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F F6 =3D PSADBW -- sum of 8Ux8 absolute differences */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF6) {
-//.. vassert(sz =3D=3D 4);
-//.. do_MMX_preamble();
-//.. delta =3D dis_MMXop_regmem_to_reg (=20
-//.. sorb, delta+2, insn[1], "psadbw", False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
-//.. /* 0F 70 =3D PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mm=
x) */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
-//.. Int order;
-//.. IRTemp sV, dV, s3, s2, s1, s0;
-//.. s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
-//.. sV =3D newTemp(Ity_I64);
-//.. dV =3D newTemp(Ity_I64);
-//.. do_MMX_preamble();
-//.. modrm =3D insn[2];
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getMMXReg(eregOfRM(modrm)) );
-//.. order =3D (Int)insn[3];
-//.. delta +=3D 2+2;
-//.. DIP("pshufw $%d,%s,%s\n", order,=20
-//.. nameMMXReg(eregOfRM(modrm)),
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
-//.. order =3D (Int)insn[2+alen];
-//.. delta +=3D 3+alen;
-//.. DIP("pshufw $%d,%s,%s\n", order,=20
-//.. dis_buf,
-//.. nameMMXReg(gregOfRM(modrm)));
-//.. }
-//.. breakup64to16s( sV, &s3, &s2, &s1, &s0 );
-//..=20
-#if 0 /* stop gcc multi-line comment warning */
-/.. # define SEL(n) \
-/.. ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? =
s2 : s3)))
-#endif /* stop gcc multi-line comment warning */
-//.. assign(dV,
-//.. mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
-//.. SEL((order>>2)&3), SEL((order>>0)&3) )
-//.. );
-//.. putMMXReg(gregOfRM(modrm), mkexpr(dV));
-//.. # undef SEL
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 0F 53 =3D RCPPS -- approx reciprocal 32Fx4 from R/M to R */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x53) {
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_SSE_E_to_G_unary_all( sorb, delta+2,=20
-//.. "rcpps", Iop_Recip32Fx4 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 53 =3D RCPSS -- approx reciprocal 32F0x4 from R/M to R =
*/
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x53) {
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_SSE_E_to_G_unary_lo32( sorb, delta+3,=20
-//.. "rcpss", Iop_Recip32F0x4 )=
;
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 0F 52 =3D RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to=
R */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x52) {
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_SSE_E_to_G_unary_all( sorb, delta+2,=20
-//.. "rsqrtps", Iop_RSqrt32Fx4 )=
;
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 52 =3D RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/=
M to R */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x52) {
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_SSE_E_to_G_unary_lo32( sorb, delta+3,=20
-//.. "rsqrtss", Iop_RSqrt32F0x4=
);
-//.. goto decode_success;
-//.. }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F6 =3D PSADBW -- sum of 8Ux8 absolute differences */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF6) {
+ do_MMX_preamble();
+ delta =3D dis_MMXop_regmem_to_reg (=20
+ pfx, delta+2, insn[1], "psadbw", False );
+ goto decode_success;
+ }
=20
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F 70 =3D PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+ sV =3D newTemp(Ity_I64);
+ dV =3D newTemp(Ity_I64);
+ do_MMX_preamble();
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ order =3D (Int)insn[3];
+ delta +=3D 2+2;
+ DIP("pshufw $%d,%s,%s\n", order,=20
+ nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,
+ 1/*extra byte after amode*/ );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ order =3D (Int)insn[2+alen];
+ delta +=3D 3+alen;
+ DIP("pshufw $%d,%s,%s\n", order,=20
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+ breakup64to16s( sV, &s3, &s2, &s1, &s0 );
+# define SEL(n) \
+ ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ? s2 :=
s3)))
+ assign(dV,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
+# undef SEL
+ goto decode_success;
+ }
+
+ /* 0F 53 =3D RCPPS -- approx reciprocal 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x53) {
+ delta =3D dis_SSE_E_to_G_unary_all( pfx, delta+2,=20
+ "rcpps", Iop_Recip32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 53 =3D RCPSS -- approx reciprocal 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x53) {
+ delta =3D dis_SSE_E_to_G_unary_lo32( pfx, delta+2,=20
+ "rcpss", Iop_Recip32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 52 =3D RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x52) {
+ delta =3D dis_SSE_E_to_G_unary_all( pfx, delta+2,=20
+ "rsqrtps", Iop_RSqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 52 =3D RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to =
R */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x52) {
+ delta =3D dis_SSE_E_to_G_unary_lo32( pfx, delta+2,=20
+ "rsqrtss", Iop_RSqrt32F0x4 );
+ goto decode_success;
+ }
+
/* 0F AE /7 =3D SFENCE -- flush pending operations to memory */
if (haveNo66noF2noF3(pfx)=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
@@ -9069,67 +9105,70 @@
goto decode_success;
}
=20
-//.. /* 0F C6 /r ib =3D SHUFPS -- shuffle packed F32s */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC6) {
-//.. Int select;
-//.. IRTemp sV, dV;
-//.. IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTem=
p_INVALID;
-//.. modrm =3D insn[2];
-//.. assign( dV, getXMMReg(gregOfRM(modrm)) );
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. select =3D (Int)insn[3];
-//.. delta +=3D 2+2;
-//.. DIP("shufps $%d,%s,%s\n", select,=20
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. select =3D (Int)insn[2+alen];
-//.. delta +=3D 3+alen;
-//.. DIP("shufps $%d,%s,%s\n", select,=20
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//..=20
-//.. breakup128to32s( dV, &d3, &d2, &d1, &d0 );
-//.. breakup128to32s( sV, &s3, &s2, &s1, &s0 );
-//..=20
-//.. # define SELD(n) ((n)=3D=3D0 ? d0 : ((n)=3D=3D1 ? d1 : ((n)=3D=3D=
2 ? d2 : d3)))
-//.. # define SELS(n) ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D=
2 ? s2 : s3)))
-//..=20
-//.. putXMMReg(
-//.. gregOfRM(modrm),=20
-//.. mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),=20
-//.. SELD((select>>2)&3), SELD((select>>0)&3) )
-//.. );
-//..=20
-//.. # undef SELD
-//.. # undef SELS
-//..=20
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 0F 51 =3D SQRTPS -- approx sqrt 32Fx4 from R/M to R */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51) {
-//.. delta =3D dis_SSE_E_to_G_unary_all( sorb, delta+2,=20
-//.. "sqrtps", Iop_Sqrt32Fx4 );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* F3 0F 51 =3D SQRTSS -- approx sqrt 32F0x4 from R/M to R */
-//.. if (insn[0] =3D=3D 0xF3 && insn[1] =3D=3D 0x0F && insn[2] =3D=3D=
0x51) {
-//.. vassert(sz =3D=3D 4);
-//.. delta =3D dis_SSE_E_to_G_unary_lo32( sorb, delta+3,=20
-//.. "sqrtss", Iop_Sqrt32F0x4 )=
;
-//.. goto decode_success;
-//.. }
+ /* 0F C6 /r ib =3D SHUFPS -- shuffle packed F32s */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC6) {
+ Int select;
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTemp_INV=
ALID;
+ modrm =3D insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
=20
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ select =3D (Int)insn[3];
+ delta +=3D 2+2;
+ DIP("shufps $%d,%s,%s\n", select,=20
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,=20
+ 1/*byte at end of insn*/ );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ select =3D (Int)insn[2+alen];
+ delta +=3D 3+alen;
+ DIP("shufps $%d,%s,%s\n", select,=20
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+# define SELD(n) ((n)=3D=3D0 ? d0 : ((n)=3D=3D1 ? d1 : ((n)=3D=3D2 ?=
d2 : d3)))
+# define SELS(n) ((n)=3D=3D0 ? s0 : ((n)=3D=3D1 ? s1 : ((n)=3D=3D2 ?=
s2 : s3)))
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),=20
+ mk128from32s( SELS((select>>6)&3), SELS((select>>4)&3),=20
+ SELD((select>>2)&3), SELD((select>>0)&3) )
+ );
+
+# undef SELD
+# undef SELS
+
+ goto decode_success;
+ }
+
+ /* 0F 51 =3D SQRTPS -- approx sqrt 32Fx4 from R/M to R */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51) {
+ delta =3D dis_SSE_E_to_G_unary_all( pfx, delta+2,=20
+ "sqrtps", Iop_Sqrt32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 51 =3D SQRTSS -- approx sqrt 32F0x4 from R/M to R */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51) {
+ delta =3D dis_SSE_E_to_G_unary_lo32( pfx, delta+2,=20
+ "sqrtss", Iop_Sqrt32F0x4 );
+ goto decode_success;
+ }
+
/* 0F AE /3 =3D STMXCSR m32 -- store %mxcsr */
if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xAE
&& haveNo66noF2noF3(pfx)
@@ -9171,46 +9210,47 @@
goto decode_success;
}
=20
-//.. /* 0F 15 =3D UNPCKHPS -- unpack and interleave high part F32s */
-//.. /* 0F 14 =3D UNPCKLPS -- unpack and interleave low part F32s */
-//.. /* These just appear to be special cases of SHUFPS */
-//.. if (sz =3D=3D 4 && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x15 |=
| insn[1] =3D=3D 0x14)) {
-//.. IRTemp sV, dV;
-//.. IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
-//.. Bool hi =3D insn[1] =3D=3D 0x15;
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTem=
p_INVALID;
-//.. modrm =3D insn[2];
-//.. assign( dV, getXMMReg(gregOfRM(modrm)) );
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. delta +=3D 2+1;
-//.. DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta +=3D 2+alen;
-//.. DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//..=20
-//.. breakup128to32s( dV, &d3, &d2, &d1, &d0 );
-//.. breakup128to32s( sV, &s3, &s2, &s1, &s0 );
-//..=20
-//.. if (hi) {
-//.. putXMMReg( gregOfRM(modrm), mk128from32s( s3, d3, s2, d2 )=
);
-//.. } else {
-//.. putXMMReg( gregOfRM(modrm), mk128from32s( s1, d1, s0, d0 )=
);
-//.. }
-//..=20
-//.. goto decode_success;
-//.. }
+ /* 0F 15 =3D UNPCKHPS -- unpack and interleave high part F32s */
+ /* 0F 14 =3D UNPCKLPS -- unpack and interleave low part F32s */
+ /* These just appear to be special cases of SHUFPS */
+ if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x15 || insn[1] =3D=3D =
0x14)) {
+ IRTemp sV, dV;
+ IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+ Bool hi =3D insn[1] =3D=3D 0x15;
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ s3 =3D s2 =3D s1 =3D s0 =3D d3 =3D d2 =3D d1 =3D d0 =3D IRTemp_INV=
ALID;
+ modrm =3D insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
=20
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta +=3D 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta +=3D 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+ if (hi) {
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s3, d3, s2, d2=
) );
+ } else {
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( s1, d1, s0, d0=
) );
+ }
+
+ goto decode_success;
+ }
+
/* 0F 57 =3D XORPS -- G =3D G and E */
if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x57) {
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2005-05-10 02:50:05 UTC (rev 1176)
+++ trunk/priv/host-amd64/hdefs.c 2005-05-10 20:08:34 UTC (rev 1177)
@@ -596,8 +596,8 @@
case Asse_CMPLTF: return "cmpFlt";
case Asse_CMPLEF: return "cmpFle";
case Asse_CMPUNF: return "cmpFun";
-//.. case Xsse_RCPF: return "rcp";
-//.. case Xsse_RSQRTF: return "rsqrt";
+ case Asse_RCPF: return "rcp";
+ case Asse_RSQRTF: return "rsqrt";
case Asse_SQRTF: return "sqrt";
case Asse_AND: return "and";
case Asse_OR: return "or";
@@ -3198,9 +3198,9 @@
case Asse_MAXF: *p++ =3D 0x5F; break;
case Asse_MINF: *p++ =3D 0x5D; break;
case Asse_MULF: *p++ =3D 0x59; break;
- //case Asse_RCPF: *p++ =3D 0x53; break;
- //case Asse_RSQRTF: *p++ =3D 0x52; break;
- //case Asse_SQRTF: *p++ =3D 0x51; break;
+ case Asse_RCPF: *p++ =3D 0x53; break;
+ case Asse_RSQRTF: *p++ =3D 0x52; break;
+ case Asse_SQRTF: *p++ =3D 0x51; break;
case Asse_SUBF: *p++ =3D 0x5C; break;
case Asse_CMPEQF: *p++ =3D 0xC2; xtra =3D 0x100; break;
case Asse_CMPLTF: *p++ =3D 0xC2; xtra =3D 0x101; break;
@@ -3254,9 +3254,9 @@
case Asse_MAXF: *p++ =3D 0x5F; break;
case Asse_MINF: *p++ =3D 0x5D; break;
case Asse_MULF: *p++ =3D 0x59; break;
-//.. case Xsse_RCPF: *p++ =3D 0x53; break;
-//.. case Xsse_RSQRTF: *p++ =3D 0x52; break;
-//.. case Xsse_SQRTF: *p++ =3D 0x51; break;
+ case Asse_RCPF: *p++ =3D 0x53; break;
+ case Asse_RSQRTF: *p++ =3D 0x52; break;
+ case Asse_SQRTF: *p++ =3D 0x51; break;
case Asse_SUBF: *p++ =3D 0x5C; break;
case Asse_CMPEQF: *p++ =3D 0xC2; xtra =3D 0x100; break;
case Asse_CMPLTF: *p++ =3D 0xC2; xtra =3D 0x101; break;
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-10 02:50:05 UTC (rev 1176)
+++ trunk/priv/host-amd64/isel.c 2005-05-10 20:08:34 UTC (rev 1177)
@@ -933,12 +933,12 @@
fn =3D (HWord)h_generic_calc_Add16x4; break;
case Iop_Add32x2:
fn =3D (HWord)h_generic_calc_Add32x2; break;
-//..=20
-//.. case Iop_Avg8Ux8:
-//.. fn =3D (HWord)h_generic_calc_Avg8Ux8; break;
-//.. case Iop_Avg16Ux4:
-//.. fn =3D (HWord)h_generic_calc_Avg16Ux4; break;
=20
+ case Iop_Avg8Ux8:
+ fn =3D (HWord)h_generic_calc_Avg8Ux8; break;
+ case Iop_Avg16Ux4:
+ fn =3D (HWord)h_generic_calc_Avg16Ux4; break;
+
case Iop_CmpEQ8x8:
fn =3D (HWord)h_generic_calc_CmpEQ8x8; break;
case Iop_CmpEQ16x4:
@@ -966,22 +966,22 @@
case Iop_InterleaveLO32x2:
fn =3D (HWord)h_generic_calc_InterleaveLO32x2; break;
=20
-//.. case Iop_Max8Ux8:
-//.. fn =3D (HWord)h_generic_calc_Max8Ux8; break;
-//.. case Iop_Max16Sx4:
-//.. fn =3D (HWord)h_generic_calc_Max16Sx4; break;
-//.. case Iop_Min8Ux8:
-//.. fn =3D (HWord)h_generic_calc_Min8Ux8; break;
-//.. case Iop_Min16Sx4:
-//.. fn =3D (HWord)h_generic_calc_Min16Sx4; break;
+ case Iop_Max8Ux8:
+ fn =3D (HWord)h_generic_calc_Max8Ux8; break;
+ case Iop_Max16Sx4:
+ fn =3D (HWord)h_generic_calc_Max16Sx4; break;
+ case Iop_Min8Ux8:
+ fn =3D (HWord)h_generic_calc_Min8Ux8; break;
+ case Iop_Min16Sx4:
+ fn =3D (HWord)h_generic_calc_Min16Sx4; break;
=20
case Iop_Mul16x4:
fn =3D (HWord)h_generic_calc_Mul16x4; break;
case Iop_MulHi16Sx4:
fn =3D (HWord)h_generic_calc_MulHi16Sx4; break;
-//.. case Iop_MulHi16Ux4:
-//.. fn =3D (HWord)h_generic_calc_MulHi16Ux4; break;
-//..=20
+ case Iop_MulHi16Ux4:
+ fn =3D (HWord)h_generic_calc_MulHi16Ux4; break;
+
case Iop_QAdd8Sx8:
fn =3D (HWord)h_generic_calc_QAdd8Sx8; break;
case Iop_QAdd16Sx4:
@@ -3178,18 +3178,18 @@
//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
//.. return dst;
//.. }
-//..=20
-//.. case Iop_Recip32Fx4: op =3D Xsse_RCPF; goto do_32Fx4_unary;
-//.. case Iop_RSqrt32Fx4: op =3D Xsse_RSQRTF; goto do_32Fx4_unary;
-//.. case Iop_Sqrt32Fx4: op =3D Xsse_SQRTF; goto do_32Fx4_unary;
-//.. do_32Fx4_unary:
-/...
[truncated message content] |