|
From: <sv...@va...> - 2005-05-10 22:42:58
|
Author: sewardj
Date: 2005-05-10 23:42:54 +0100 (Tue, 10 May 2005)
New Revision: 1178
Modified:
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/hdefs.h
trunk/priv/host-amd64/isel.c
Log:
Enough SSE2 instructions to sink a small ship. And that's not even
half of them.
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-10 20:08:34 UTC (rev 1177)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-10 22:42:54 UTC (rev 1178)
@@ -1419,17 +1419,17 @@
}
}
=20
-//.. /* Lanes of vector registers are always numbered from zero being th=
e
-//.. least significant lane (rightmost in the register). */
-//..=20
-//.. static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
-//.. {
-//.. /* Correct for little-endian host only. */
-//.. vassert(!host_is_bigendian);
-//.. vassert(laneno >=3D 0 && laneno < 8);
-//.. return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
-//.. }
+/* Lanes of vector registers are always numbered from zero being the
+ least significant lane (rightmost in the register). */
=20
+static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
+{
+ /* Correct for little-endian host only. */
+ vassert(!host_is_bigendian);
+ vassert(laneno >=3D 0 && laneno < 8);
+ return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
+}
+
static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
{
/* Correct for little-endian host only. */
@@ -1512,11 +1512,11 @@
stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
}
=20
-//.. static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
-//.. {
-//.. vassert(typeOfIRExpr(irbb->tyenv,e) =3D=3D Ity_I16);
-//.. stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
-//.. }
+static void putXMMRegLane16 ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irbb->tyenv,e) =3D=3D Ity_I16);
+ stmt( IRStmt_Put( xmmGuestRegLane16offset(xmmreg,laneno), e ) );
+}
=20
static IRExpr* mkV128 ( UShort mask )
{
@@ -9301,11 +9301,12 @@
goto decode_success;
}
=20
-//.. /* 66 0F C2 =3D CMPPD -- 64Fx2 comparison from R/M to R */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC2) {
-//.. delta =3D dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 =
);
-//.. goto decode_success;
-//.. }
+ /* 66 0F C2 =3D CMPPD -- 64Fx2 comparison from R/M to R */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC2) {
+ delta =3D dis_SSEcmp_E_to_G( pfx, delta+2, "cmppd", True, 8 );
+ goto decode_success;
+ }
=20
/* F2 0F C2 =3D CMPSD -- 64F0x2 comparison from R/M to R */
if (haveF2no66noF3(pfx) && sz =3D=3D 4
@@ -10018,7 +10019,8 @@
/* 66 0F 6E =3D MOVD from ireg32/m32 to xmm lo 1/4, zeroing high 3/4 =
of xmm. */
/* or from ireg64/m64 to xmm lo 1/2, zeroing high 1/2 of=
xmm. */
if (have66noF2noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6E=
) {
- vassert(sz =3D=3D 4 || sz =3D=3D 8);
+ vassert(sz =3D=3D 2 || sz =3D=3D 8);
+ if (sz =3D=3D 2) sz =3D 4;
modrm =3D getUChar(delta+2);
if (epartIsReg(modrm)) {
delta +=3D 2+1;
@@ -10444,12 +10446,13 @@
goto decode_success;
}
=20
-//.. /* 66 0F 51 =3D SQRTPD -- approx sqrt 64Fx2 from R/M to R */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51) {
-//.. delta =3D dis_SSE_E_to_G_unary_all( sorb, delta+2,=20
-//.. "sqrtpd", Iop_Sqrt64Fx2 );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 51 =3D SQRTPD -- approx sqrt 64Fx2 from R/M to R */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51) {
+ delta =3D dis_SSE_E_to_G_unary_all( pfx, delta+2,=20
+ "sqrtpd", Iop_Sqrt64Fx2 );
+ goto decode_success;
+ }
=20
/* F2 0F 51 =3D SQRTSD -- approx sqrt 64F0x2 from R/M to R */
if (haveF2no66noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x51=
) {
@@ -10522,46 +10525,52 @@
}
=20
/* 66 0F 57 =3D XORPD -- G =3D G xor E */
- if (have66noF2noF3(pfx) && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x57=
) {
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x57) {
delta =3D dis_SSE_E_to_G_all( pfx, delta+2, "xorpd", Iop_XorV128 )=
;
goto decode_success;
}
=20
-//.. /* 66 0F 6B =3D PACKSSDW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6B) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "packssdw", Iop_QNarrow32Sx4, True=
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 63 =3D PACKSSWB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x63) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "packsswb", Iop_QNarrow16Sx8, True=
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 67 =3D PACKUSWB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x67) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "packuswb", Iop_QNarrow16Ux8, True=
);
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F FC =3D PADDB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFC) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddb", Iop_Add8x16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F FE =3D PADDD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFE) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "paddd", Iop_Add32x4, False );
-//.. goto decode_success;
-//.. }
+ /* 66 0F 6B =3D PACKSSDW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6B) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "packssdw", Iop_QNarrow32Sx4, True );
+ goto decode_success;
+ }
=20
+ /* 66 0F 63 =3D PACKSSWB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x63) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "packsswb", Iop_QNarrow16Sx8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 67 =3D PACKUSWB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x67) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "packuswb", Iop_QNarrow16Ux8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F FC =3D PADDB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFC) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddb", Iop_Add8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FE =3D PADDD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFE) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "paddd", Iop_Add32x4, False );
+ goto decode_success;
+ }
+
/* ***--- this is an MMX class insn introduced in SSE2 ---*** */
/* 0F D4 =3D PADDQ -- add 64x1 */
if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
@@ -10683,60 +10692,70 @@
//.. "pcmpgtw", Iop_CmpGT16Sx8, False )=
;
//.. goto decode_success;
//.. }
-//..=20
-//.. /* 66 0F C5 =3D PEXTRW -- extract 16-bit field from xmm(E) and p=
ut=20
-//.. zero-extend of it in ireg(G). */
-//.. if (insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC5) {
-//.. modrm =3D insn[2];
-//.. if (sz =3D=3D 2 && epartIsReg(modrm)) {
-//.. t5 =3D newTemp(Ity_V128);
-//.. t4 =3D newTemp(Ity_I16);
-//.. assign(t5, getXMMReg(eregOfRM(modrm)));
-//.. breakup128to32s( t5, &t3, &t2, &t1, &t0 );
-//.. switch (insn[3] & 7) {
-//.. case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); br=
eak;
-//.. case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); br=
eak;
-//.. case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); br=
eak;
-//.. case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); br=
eak;
-//.. case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); br=
eak;
-//.. case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); br=
eak;
-//.. case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); br=
eak;
-//.. case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); br=
eak;
-//.. default: vassert(0);
-//.. }
-//.. putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t4)))=
;
-//.. DIP("pextrw $%d,%s,%s\n",
-//.. (Int)insn[3], nameXMMReg(eregOfRM(modrm)),
-//.. nameIReg(4,gregOfRM(modrm)));
-//.. delta +=3D 4;
-//.. goto decode_success;
-//.. }=20
-//.. /* else fall through */
-//.. }
-//..=20
-//.. /* 66 0F C4 =3D PINSRW -- get 16 bits from E(mem or low half ire=
g) and
-//.. put it into the specified lane of xmm(G). */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC4) {
-//.. Int lane;
-//.. t4 =3D newTemp(Ity_I16);
-//.. modrm =3D insn[2];
-//..=20
-//.. if (epartIsReg(modrm)) {
-//.. assign(t4, getIReg(2, eregOfRM(modrm)));
-//.. lane =3D insn[3];
-//.. delta +=3D 2+2;
-//.. DIP("pinsrw $%d,%s,%s\n", (Int)lane,=20
-//.. nameIReg(2,eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. /* awaiting test case */
-//.. goto decode_failure;
-//.. }
-//..=20
-//.. putXMMRegLane16( gregOfRM(modrm), lane & 7, mkexpr(t4) );
-//.. goto decode_success;
-//.. }
-//..=20
+
+ /* 66 0F C5 =3D PEXTRW -- extract 16-bit field from xmm(E) and put=20
+ zero-extend of it in ireg(G). */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC5) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ t5 =3D newTemp(Ity_V128);
+ t4 =3D newTemp(Ity_I16);
+ assign(t5, getXMMReg(eregOfRexRM(pfx,modrm)));
+ breakup128to32s( t5, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 7) {
+ case 0: assign(t4, unop(Iop_32to16, mkexpr(t0))); break;
+ case 1: assign(t4, unop(Iop_32HIto16, mkexpr(t0))); break;
+ case 2: assign(t4, unop(Iop_32to16, mkexpr(t1))); break;
+ case 3: assign(t4, unop(Iop_32HIto16, mkexpr(t1))); break;
+ case 4: assign(t4, unop(Iop_32to16, mkexpr(t2))); break;
+ case 5: assign(t4, unop(Iop_32HIto16, mkexpr(t2))); break;
+ case 6: assign(t4, unop(Iop_32to16, mkexpr(t3))); break;
+ case 7: assign(t4, unop(Iop_32HIto16, mkexpr(t3))); break;
+ default: vassert(0);
+ }
+ putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t4))=
);
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIReg32(gregOfRexRM(pfx,modrm)));
+ delta +=3D 4;
+ goto decode_success;
+ }=20
+ /* else fall through */
+ /* note, if memory case is ever filled in, there is 1 byte after
+ amode */
+ }
+
+ /* 66 0F C4 =3D PINSRW -- get 16 bits from E(mem or low half ireg) an=
d
+ put it into the specified lane of xmm(G). */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xC4) {
+ Int lane;
+ t4 =3D newTemp(Ity_I16);
+ modrm =3D insn[2];
+
+ if (epartIsReg(modrm)) {
+ assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
+ delta +=3D 3+1;
+ lane =3D insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,=20
+ nameIReg16(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf,=20
+ 1/*byte after the amode*/ );
+ delta +=3D 3+alen;
+ lane =3D insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ putXMMRegLane16( gregOfRexRM(pfx,modrm), lane & 7, mkexpr(t4) );
+ goto decode_success;
+ }
+
//.. /* 66 0F EE =3D PMAXSW -- 16x8 signed max */
//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEE) {
//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
@@ -11036,64 +11055,69 @@
//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "pslld", Iop_Shl=
N32x4 );
//.. goto decode_success;
//.. }
-//..=20
-//.. /* 66 0F 73 /7 ib =3D PSLLDQ by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 7) {
-//.. IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
-//.. Int imm =3D (Int)insn[3];
-//.. Int reg =3D eregOfRM(insn[2]);
-//.. DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
-//.. vassert(imm >=3D 0 && imm <=3D 255);
-//.. delta +=3D 4;
-//..=20
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. hi64 =3D newTemp(Ity_I64);
-//.. lo64 =3D newTemp(Ity_I64);
-//.. hi64r =3D newTemp(Ity_I64);
-//.. lo64r =3D newTemp(Ity_I64);
-//..=20
-//.. if (imm >=3D 16) {
-//.. vassert(0); /* awaiting test case */
-//.. putXMMReg(reg, mkV128(0x0000));
-//.. goto decode_success;
-//.. }
-//..=20
-//.. assign( sV, getXMMReg(reg) );
-//.. assign( hi64, unop(Iop_128HIto64, mkexpr(sV)) );
-//.. assign( lo64, unop(Iop_128to64, mkexpr(sV)) );
-//..=20
-//.. if (imm =3D=3D 8) {
-//.. assign( lo64r, mkU64(0) );
-//.. assign( hi64r, mkexpr(lo64) );
-//.. }
-//.. else
-//.. if (imm > 8) {
-//.. vassert(0); /* awaiting test case */
-//.. assign( lo64r, mkU64(0) );
-//.. assign( hi64r, binop( Iop_Shl64,=20
-//.. mkexpr(lo64),
-//.. mkU8( 8*(imm-8) ) ));
-//.. } else {
-//.. assign( lo64r, binop( Iop_Shl64,=20
-//.. mkexpr(lo64),
-//.. mkU8(8 * imm) ));
-//.. assign( hi64r,=20
-//.. binop( Iop_Or64,
-//.. binop(Iop_Shl64, mkexpr(hi64),=20
-//.. mkU8(8 * imm)),
-//.. binop(Iop_Shr64, mkexpr(lo64),
-//.. mkU8(8 * (8 - imm)) )
-//.. )
-//.. );
-//.. }
-//.. assign( dV, binop(Iop_64HLto128, mkexpr(hi64r), mkexpr(lo64r)=
) );
-//.. putXMMReg(reg, mkexpr(dV));
-//.. goto decode_success;
-//.. }
-//..=20
+
+ /* 66 0F 73 /7 ib =3D PSLLDQ by immediate */
+ /* note, if mem case ever filled in, 1 byte after amode */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 7) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm =3D (Int)insn[3];
+ Int reg =3D eregOfRexRM(pfx,insn[2]);
+ DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >=3D 0 && imm <=3D 255);
+ delta +=3D 4;
+
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ hi64 =3D newTemp(Ity_I64);
+ lo64 =3D newTemp(Ity_I64);
+ hi64r =3D newTemp(Ity_I64);
+ lo64r =3D newTemp(Ity_I64);
+
+ if (imm >=3D 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm =3D=3D 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm =3D=3D 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, mkexpr(lo64) );
+ }
+ else
+ if (imm > 8) {
+ assign( lo64r, mkU64(0) );
+ assign( hi64r, binop( Iop_Shl64,=20
+ mkexpr(lo64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( lo64r, binop( Iop_Shl64,=20
+ mkexpr(lo64),
+ mkU8(8 * imm) ));
+ assign( hi64r,=20
+ binop( Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64),=20
+ mkU8(8 * imm)),
+ binop(Iop_Shr64, mkexpr(lo64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
//.. /* 66 0F 73 /6 ib =3D PSLLQ by immediate */
//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
//.. && epartIsReg(insn[2])
@@ -11163,65 +11187,70 @@
//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrld", Iop_Shr=
N32x4 );
//.. goto decode_success;
//.. }
-//..=20
-//.. /* 66 0F 73 /3 ib =3D PSRLDQ by immediate */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
-//.. && epartIsReg(insn[2])
-//.. && gregOfRM(insn[2]) =3D=3D 3) {
-//.. IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
-//.. Int imm =3D (Int)insn[3];
-//.. Int reg =3D eregOfRM(insn[2]);
-//.. DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
-//.. vassert(imm >=3D 0 && imm <=3D 255);
-//.. delta +=3D 4;
-//..=20
-//.. sV =3D newTemp(Ity_V128);
-//.. dV =3D newTemp(Ity_V128);
-//.. hi64 =3D newTemp(Ity_I64);
-//.. lo64 =3D newTemp(Ity_I64);
-//.. hi64r =3D newTemp(Ity_I64);
-//.. lo64r =3D newTemp(Ity_I64);
-//..=20
-//.. if (imm >=3D 16) {
-//.. vassert(0); /* awaiting test case */
-//.. putXMMReg(reg, mkV128(0x0000));
-//.. goto decode_success;
-//.. }
-//..=20
-//.. assign( sV, getXMMReg(reg) );
-//.. assign( hi64, unop(Iop_128HIto64, mkexpr(sV)) );
-//.. assign( lo64, unop(Iop_128to64, mkexpr(sV)) );
-//..=20
-//.. if (imm =3D=3D 8) {
-//.. assign( hi64r, mkU64(0) );
-//.. assign( lo64r, mkexpr(hi64) );
-//.. }
-//.. else=20
-//.. if (imm > 8) {
-//.. vassert(0); /* awaiting test case */
-//.. assign( hi64r, mkU64(0) );
-//.. assign( lo64r, binop( Iop_Shr64,=20
-//.. mkexpr(hi64),
-//.. mkU8( 8*(imm-8) ) ));
-//.. } else {
-//.. assign( hi64r, binop( Iop_Shr64,=20
-//.. mkexpr(hi64),
-//.. mkU8(8 * imm) ));
-//.. assign( lo64r,=20
-//.. binop( Iop_Or64,
-//.. binop(Iop_Shr64, mkexpr(lo64),=20
-//.. mkU8(8 * imm)),
-//.. binop(Iop_Shl64, mkexpr(hi64),
-//.. mkU8(8 * (8 - imm)) )
-//.. )
-//.. );
-//.. }
-//..=20
-//.. assign( dV, binop(Iop_64HLto128, mkexpr(hi64r), mkexpr(lo64r)=
) );
-//.. putXMMReg(reg, mkexpr(dV));
-//.. goto decode_success;
-//.. }
=20
+ /* 66 0F 73 /3 ib =3D PSRLDQ by immediate */
+ /* note, if mem case ever filled in, 1 byte after amode */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
+ && epartIsReg(insn[2])
+ && gregLO3ofRM(insn[2]) =3D=3D 3) {
+ IRTemp sV, dV, hi64, lo64, hi64r, lo64r;
+ Int imm =3D (Int)insn[3];
+ Int reg =3D eregOfRexRM(pfx,insn[2]);
+ DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
+ vassert(imm >=3D 0 && imm <=3D 255);
+ delta +=3D 4;
+
+ sV =3D newTemp(Ity_V128);
+ dV =3D newTemp(Ity_V128);
+ hi64 =3D newTemp(Ity_I64);
+ lo64 =3D newTemp(Ity_I64);
+ hi64r =3D newTemp(Ity_I64);
+ lo64r =3D newTemp(Ity_I64);
+
+ if (imm >=3D 16) {
+ putXMMReg(reg, mkV128(0x0000));
+ goto decode_success;
+ }
+
+ assign( sV, getXMMReg(reg) );
+ assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (imm =3D=3D 0) {
+ assign( lo64r, mkexpr(lo64) );
+ assign( hi64r, mkexpr(hi64) );
+ }
+ else
+ if (imm =3D=3D 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, mkexpr(hi64) );
+ }
+ else=20
+ if (imm > 8) {
+ assign( hi64r, mkU64(0) );
+ assign( lo64r, binop( Iop_Shr64,=20
+ mkexpr(hi64),
+ mkU8( 8*(imm-8) ) ));
+ } else {
+ assign( hi64r, binop( Iop_Shr64,=20
+ mkexpr(hi64),
+ mkU8(8 * imm) ));
+ assign( lo64r,=20
+ binop( Iop_Or64,
+ binop(Iop_Shr64, mkexpr(lo64),=20
+ mkU8(8 * imm)),
+ binop(Iop_Shl64, mkexpr(hi64),
+ mkU8(8 * (8 - imm)) )
+ )
+ );
+ }
+
+ assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
+ putXMMReg(reg, mkexpr(dV));
+ goto decode_success;
+ }
+
/* 66 0F 73 /2 ib =3D PSRLQ by immediate */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x73
@@ -11250,21 +11279,23 @@
//.. delta =3D dis_SSE_shiftG_byE( sorb, delta+2, "psrlw", Iop_Shr=
N16x8 );
//.. goto decode_success;
//.. }
-//..=20
-//.. /* 66 0F F8 =3D PSUBB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF8) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubb", Iop_Sub8x16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F FA =3D PSUBD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFA) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubd", Iop_Sub32x4, False );
-//.. goto decode_success;
-//.. }
=20
+ /* 66 0F F8 =3D PSUBB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF8) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubb", Iop_Sub8x16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F FA =3D PSUBD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xFA) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubd", Iop_Sub32x4, False );
+ goto decode_success;
+ }
+
/* ***--- this is an MMX class insn introduced in SSE2 ---*** */
/* 0F FB =3D PSUBQ -- sub 64x1 */
if (haveNo66noF2noF3(pfx) && sz =3D=3D 4=20
@@ -11283,105 +11314,118 @@
goto decode_success;
}
=20
-//.. /* 66 0F F9 =3D PSUBW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF9) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubw", Iop_Sub16x8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E8 =3D PSUBSB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE8) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubsb", Iop_QSub8Sx16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F E9 =3D PSUBSW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE9) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubsw", Iop_QSub16Sx8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D8 =3D PSUBSB */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD8) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubusb", Iop_QSub8Ux16, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F D9 =3D PSUBSW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD9) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "psubusw", Iop_QSub16Ux8, False );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 68 =3D PUNPCKHBW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x68) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpckhbw",
-//.. Iop_InterleaveHI8x16, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 6A =3D PUNPCKHDQ */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6A) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpckhdq",
-//.. Iop_InterleaveHI32x4, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 6D =3D PUNPCKHQDQ */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6D) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpckhqdq",
-//.. Iop_InterleaveHI64x2, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 69 =3D PUNPCKHWD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x69) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpckhwd",
-//.. Iop_InterleaveHI16x8, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 60 =3D PUNPCKLBW */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x60) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpcklbw",
-//.. Iop_InterleaveLO8x16, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 62 =3D PUNPCKLDQ */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x62) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpckldq",
-//.. Iop_InterleaveLO32x4, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 6C =3D PUNPCKLQDQ */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6C) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpcklqdq",
-//.. Iop_InterleaveLO64x2, True );
-//.. goto decode_success;
-//.. }
-//..=20
-//.. /* 66 0F 61 =3D PUNPCKLWD */
-//.. if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x61) {
-//.. delta =3D dis_SSEint_E_to_G( sorb, delta+2,=20
-//.. "punpcklwd",
-//.. Iop_InterleaveLO16x8, True );
-//.. goto decode_success;
-//.. }
+ /* 66 0F F9 =3D PSUBW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF9) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubw", Iop_Sub16x8, False );
+ goto decode_success;
+ }
=20
+ /* 66 0F E8 =3D PSUBSB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE8) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubsb", Iop_QSub8Sx16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F E9 =3D PSUBSW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xE9) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubsw", Iop_QSub16Sx8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D8 =3D PSUBSB */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD8) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubusb", Iop_QSub8Ux16, False );
+ goto decode_success;
+ }
+
+ /* 66 0F D9 =3D PSUBSW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD9) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "psubusw", Iop_QSub16Ux8, False );
+ goto decode_success;
+ }
+
+ /* 66 0F 68 =3D PUNPCKHBW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x68) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpckhbw",
+ Iop_InterleaveHI8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6A =3D PUNPCKHDQ */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6A) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpckhdq",
+ Iop_InterleaveHI32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6D =3D PUNPCKHQDQ */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6D) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpckhqdq",
+ Iop_InterleaveHI64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 69 =3D PUNPCKHWD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x69) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpckhwd",
+ Iop_InterleaveHI16x8, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 60 =3D PUNPCKLBW */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x60) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpcklbw",
+ Iop_InterleaveLO8x16, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 62 =3D PUNPCKLDQ */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x62) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpckldq",
+ Iop_InterleaveLO32x4, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 6C =3D PUNPCKLQDQ */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x6C) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpcklqdq",
+ Iop_InterleaveLO64x2, True );
+ goto decode_success;
+ }
+
+ /* 66 0F 61 =3D PUNPCKLWD */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x61) {
+ delta =3D dis_SSEint_E_to_G( pfx, delta+2,=20
+ "punpcklwd",
+ Iop_InterleaveLO16x8, True );
+ goto decode_success;
+ }
+
/* 66 0F EF =3D PXOR */
if (have66noF2noF3(pfx) && sz =3D=3D 2=20
&& insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xEF) {
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2005-05-10 20:08:34 UTC (rev 1177)
+++ trunk/priv/host-amd64/hdefs.c 2005-05-10 22:42:54 UTC (rev 1178)
@@ -603,22 +603,22 @@
case Asse_OR: return "or";
case Asse_XOR: return "xor";
case Asse_ANDN: return "andn";
-//.. case Xsse_ADD8: return "paddb";
-//.. case Xsse_ADD16: return "paddw";
-//.. case Xsse_ADD32: return "paddd";
+ case Asse_ADD8: return "paddb";
+ case Asse_ADD16: return "paddw";
+ case Asse_ADD32: return "paddd";
case Asse_ADD64: return "paddq";
//.. case Xsse_QADD8U: return "paddusb";
//.. case Xsse_QADD16U: return "paddusw";
//.. case Xsse_QADD8S: return "paddsb";
//.. case Xsse_QADD16S: return "paddsw";
-//.. case Xsse_SUB8: return "psubb";
-//.. case Xsse_SUB16: return "psubw";
-//.. case Xsse_SUB32: return "psubd";
+ case Asse_SUB8: return "psubb";
+ case Asse_SUB16: return "psubw";
+ case Asse_SUB32: return "psubd";
case Asse_SUB64: return "psubq";
-//.. case Xsse_QSUB8U: return "psubusb";
-//.. case Xsse_QSUB16U: return "psubusw";
-//.. case Xsse_QSUB8S: return "psubsb";
-//.. case Xsse_QSUB16S: return "psubsw";
+ case Asse_QSUB8U: return "psubusb";
+ case Asse_QSUB16U: return "psubusw";
+ case Asse_QSUB8S: return "psubsb";
+ case Asse_QSUB16S: return "psubsw";
//.. case Xsse_MUL16: return "pmullw";
//.. case Xsse_MULHI16U: return "pmulhuw";
//.. case Xsse_MULHI16S: return "pmulhw";
@@ -642,17 +642,17 @@
case Asse_SHR64: return "psrlq";
//.. case Xsse_SAR16: return "psraw";
//.. case Xsse_SAR32: return "psrad";
-//.. case Xsse_PACKSSD: return "packssdw";
-//.. case Xsse_PACKSSW: return "packsswb";
-//.. case Xsse_PACKUSW: return "packuswb";
-//.. case Xsse_UNPCKHB: return "punpckhb";
-//.. case Xsse_UNPCKHW: return "punpckhw";
-//.. case Xsse_UNPCKHD: return "punpckhd";
-//.. case Xsse_UNPCKHQ: return "punpckhq";
-//.. case Xsse_UNPCKLB: return "punpcklb";
-//.. case Xsse_UNPCKLW: return "punpcklw";
-//.. case Xsse_UNPCKLD: return "punpckld";
-//.. case Xsse_UNPCKLQ: return "punpcklq";
+ case Asse_PACKSSD: return "packssdw";
+ case Asse_PACKSSW: return "packsswb";
+ case Asse_PACKUSW: return "packuswb";
+ case Asse_UNPCKHB: return "punpckhb";
+ case Asse_UNPCKHW: return "punpckhw";
+ case Asse_UNPCKHD: return "punpckhd";
+ case Asse_UNPCKHQ: return "punpckhq";
+ case Asse_UNPCKLB: return "punpcklb";
+ case Asse_UNPCKLW: return "punpcklw";
+ case Asse_UNPCKLD: return "punpckld";
+ case Asse_UNPCKLQ: return "punpcklq";
default: vpanic("showAMD64SseOp");
}
}
@@ -3228,11 +3228,11 @@
case Asse_MULF: *p++ =3D 0x59; break;
//.. case Xsse_RCPF: *p++ =3D 0x53; break;
//.. case Xsse_RSQRTF: *p++ =3D 0x52; break;
-//.. case Xsse_SQRTF: *p++ =3D 0x51; break;
+ case Asse_SQRTF: *p++ =3D 0x51; break;
case Asse_SUBF: *p++ =3D 0x5C; break;
-//.. case Xsse_CMPEQF: *p++ =3D 0xC2; xtra =3D 0x100; break;
-//.. case Xsse_CMPLTF: *p++ =3D 0xC2; xtra =3D 0x101; break;
-//.. case Xsse_CMPLEF: *p++ =3D 0xC2; xtra =3D 0x102; break;
+ case Asse_CMPEQF: *p++ =3D 0xC2; xtra =3D 0x100; break;
+ case Asse_CMPLTF: *p++ =3D 0xC2; xtra =3D 0x101; break;
+ case Asse_CMPLEF: *p++ =3D 0xC2; xtra =3D 0x102; break;
default: goto bad;
}
p =3D doAMode_R(p, vreg2ireg(i->Ain.Sse64Fx2.dst),
@@ -3310,12 +3310,12 @@
case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); brea=
k;
case Asse_AND: XX(rex); XX(0x0F); XX(0x54); brea=
k;
case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); brea=
k;
-//.. case Xsse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B);=
break;
-//.. case Xsse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63);=
break;
-//.. case Xsse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67);=
break;
-//.. case Xsse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC);=
break;
+ case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); brea=
k;
+ case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); brea=
k;
+ case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); brea=
k;
+ case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); brea=
k;
//.. case Xsse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD);=
break;
-//.. case Xsse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE);=
break;
+ case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); brea=
k;
case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); brea=
k;
//.. case Xsse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC);=
break;
//.. case Xsse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED);=
break;
@@ -3344,22 +3344,22 @@
//.. case Xsse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1);=
break;
//.. case Xsse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2);=
break;
case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); brea=
k;
-//.. case Xsse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8);=
break;
-//.. case Xsse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9);=
break;
-//.. case Xsse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA);=
break;
+ case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); brea=
k;
+ case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); brea=
k;
+ case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); brea=
k;
case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); brea=
k;
-//.. case Xsse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8);=
break;
-//.. case Xsse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9);=
break;
-//.. case Xsse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8);=
break;
-//.. case Xsse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9);=
break;
-//.. case Xsse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68);=
break;
-//.. case Xsse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69);=
break;
-//.. case Xsse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A);=
break;
-//.. case Xsse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D);=
break;
-//.. case Xsse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60);=
break;
-//.. case Xsse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61);=
break;
-//.. case Xsse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62);=
break;
-//.. case Xsse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C);=
break;
+ case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); brea=
k;
+ case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); brea=
k;
+ case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); brea=
k;
+ case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); brea=
k;
+ case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); brea=
k;
+ case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); brea=
k;
+ case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); brea=
k;
+ case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); brea=
k;
+ case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); brea=
k;
+ case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); brea=
k;
+ case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); brea=
k;
+ case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); brea=
k;
default: goto bad;
}
p =3D doAMode_R(p, vreg2ireg(i->Ain.SseReRg.dst),
Modified: trunk/priv/host-amd64/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.h 2005-05-10 20:08:34 UTC (rev 1177)
+++ trunk/priv/host-amd64/hdefs.h 2005-05-10 22:42:54 UTC (rev 1178)
@@ -323,14 +323,12 @@
/* Bitwise */
Asse_AND, Asse_OR, Asse_XOR, Asse_ANDN,
//.. /* Integer binary */
-//.. Xsse_ADD8, Xsse_ADD16, Xsse_ADD32,
- Asse_ADD64,
+ Asse_ADD8, Asse_ADD16, Asse_ADD32, Asse_ADD64,
//.. Xsse_QADD8U, Xsse_QADD16U,
//.. Xsse_QADD8S, Xsse_QADD16S,
-//.. Xsse_SUB8, Xsse_SUB16, Xsse_SUB32,
- Asse_SUB64,
-//.. Xsse_QSUB8U, Xsse_QSUB16U,
-//.. Xsse_QSUB8S, Xsse_QSUB16S,
+ Asse_SUB8, Asse_SUB16, Asse_SUB32, Asse_SUB64,
+ Asse_QSUB8U, Asse_QSUB16U,
+ Asse_QSUB8S, Asse_QSUB16S,
//.. Xsse_MUL16,
//.. Xsse_MULHI16U,
//.. Xsse_MULHI16S,
@@ -346,9 +344,9 @@
//.. Xsse_SHR16, Xsse_SHR32,=20
Asse_SHR64,
//.. Xsse_SAR16, Xsse_SAR32,=20
-//.. Xsse_PACKSSD, Xsse_PACKSSW, Xsse_PACKUSW,
-//.. Xsse_UNPCKHB, Xsse_UNPCKHW, Xsse_UNPCKHD, Xsse_UNPCKHQ,
-//.. Xsse_UNPCKLB, Xsse_UNPCKLW, Xsse_UNPCKLD, Xsse_UNPCKLQ
+ Asse_PACKSSD, Asse_PACKSSW, Asse_PACKUSW,
+ Asse_UNPCKHB, Asse_UNPCKHW, Asse_UNPCKHD, Asse_UNPCKHQ,
+ Asse_UNPCKLB, Asse_UNPCKLW, Asse_UNPCKLD, Asse_UNPCKLQ
}
AMD64SseOp;
=20
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-10 20:08:34 UTC (rev 1177)
+++ trunk/priv/host-amd64/isel.c 2005-05-10 22:42:54 UTC (rev 1178)
@@ -3191,16 +3191,15 @@
}
=20
//.. case Iop_Recip64Fx2: op =3D Xsse_RCPF; goto do_64Fx2_unary;
-//.. case Iop_RSqrt64Fx2: op =3D Xsse_RSQRTF; goto do_64Fx2_unary;
-//.. case Iop_Sqrt64Fx2: op =3D Xsse_SQRTF; goto do_64Fx2_unary;
-//.. do_64Fx2_unary:
-//.. {
-//.. HReg arg =3D iselVecExpr(env, e->Iex.Unop.arg);
-//.. HReg dst =3D newVRegV(env);
-//.. REQUIRE_SSE2;
-//.. addInstr(env, X86Instr_Sse64Fx2(op, arg, dst));
-//.. return dst;
-//.. }
+//.. case Iop_RSqrt64Fx2: op =3D Asse_RSQRTF; goto do_64Fx2_unary;
+ case Iop_Sqrt64Fx2: op =3D Asse_SQRTF; goto do_64Fx2_unary;
+ do_64Fx2_unary:
+ {
+ HReg arg =3D iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst =3D newVRegV(env);
+ addInstr(env, AMD64Instr_Sse64Fx2(op, arg, dst));
+ return dst;
+ }
=20
case Iop_Recip32F0x4: op =3D Asse_RCPF; goto do_32F0x4_unary;
case Iop_RSqrt32F0x4: op =3D Asse_RSQRTF; goto do_32F0x4_unary;
@@ -3317,9 +3316,9 @@
return dst;
}
=20
-//.. case Iop_CmpEQ64Fx2: op =3D Xsse_CMPEQF; goto do_64Fx2;
-//.. case Iop_CmpLT64Fx2: op =3D Xsse_CMPLTF; goto do_64Fx2;
-//.. case Iop_CmpLE64Fx2: op =3D Xsse_CMPLEF; goto do_64Fx2;
+ case Iop_CmpEQ64Fx2: op =3D Asse_CMPEQF; goto do_64Fx2;
+ case Iop_CmpLT64Fx2: op =3D Asse_CMPLTF; goto do_64Fx2;
+ case Iop_CmpLE64Fx2: op =3D Asse_CMPLEF; goto do_64Fx2;
case Iop_Add64Fx2: op =3D Asse_ADDF; goto do_64Fx2;
//.. case Iop_Div64Fx2: op =3D Xsse_DIVF; goto do_64Fx2;
//.. case Iop_Max64Fx2: op =3D Xsse_MAXF; goto do_64Fx2;
@@ -3372,37 +3371,37 @@
return dst;
}
=20
-//.. case Iop_QNarrow32Sx4:=20
-//.. op =3D Xsse_PACKSSD; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_QNarrow16Sx8:=20
-//.. op =3D Xsse_PACKSSW; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_QNarrow16Ux8:=20
-//.. op =3D Xsse_PACKUSW; arg1isEReg =3D True; goto do_SseReRg;
-//..=20
-//.. case Iop_InterleaveHI8x16:=20
-//.. op =3D Xsse_UNPCKHB; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveHI16x8:=20
-//.. op =3D Xsse_UNPCKHW; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveHI32x4:=20
-//.. op =3D Xsse_UNPCKHD; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveHI64x2:=20
-//.. op =3D Xsse_UNPCKHQ; arg1isEReg =3D True; goto do_SseReRg;
-//..=20
-//.. case Iop_InterleaveLO8x16:=20
-//.. op =3D Xsse_UNPCKLB; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveLO16x8:=20
-//.. op =3D Xsse_UNPCKLW; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveLO32x4:=20
-//.. op =3D Xsse_UNPCKLD; arg1isEReg =3D True; goto do_SseReRg;
-//.. case Iop_InterleaveLO64x2:=20
-//.. op =3D Xsse_UNPCKLQ; arg1isEReg =3D True; goto do_SseReRg;
-//..=20
+ case Iop_QNarrow32Sx4:=20
+ op =3D Asse_PACKSSD; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_QNarrow16Sx8:=20
+ op =3D Asse_PACKSSW; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_QNarrow16Ux8:=20
+ op =3D Asse_PACKUSW; arg1isEReg =3D True; goto do_SseReRg;
+
+ case Iop_InterleaveHI8x16:=20
+ op =3D Asse_UNPCKHB; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveHI16x8:=20
+ op =3D Asse_UNPCKHW; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveHI32x4:=20
+ op =3D Asse_UNPCKHD; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveHI64x2:=20
+ op =3D Asse_UNPCKHQ; arg1isEReg =3D True; goto do_SseReRg;
+
+ case Iop_InterleaveLO8x16:=20
+ op =3D Asse_UNPCKLB; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveLO16x8:=20
+ op =3D Asse_UNPCKLW; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveLO32x4:=20
+ op =3D Asse_UNPCKLD; arg1isEReg =3D True; goto do_SseReRg;
+ case Iop_InterleaveLO64x2:=20
+ op =3D Asse_UNPCKLQ; arg1isEReg =3D True; goto do_SseReRg;
+
case Iop_AndV128: op =3D Asse_AND; goto do_SseReRg;
case Iop_OrV128: op =3D Asse_OR; goto do_SseReRg;
case Iop_XorV128: op =3D Asse_XOR; goto do_SseReRg;
-//.. case Iop_Add8x16: op =3D Xsse_ADD8; goto do_SseReRg;
+ case Iop_Add8x16: op =3D Asse_ADD8; goto do_SseReRg;
//.. case Iop_Add16x8: op =3D Xsse_ADD16; goto do_SseReRg;
-//.. case Iop_Add32x4: op =3D Xsse_ADD32; goto do_SseReRg;
+ case Iop_Add32x4: op =3D Asse_ADD32; goto do_SseReRg;
case Iop_Add64x2: op =3D Asse_ADD64; goto do_SseReRg;
//.. case Iop_QAdd8Sx16: op =3D Xsse_QADD8S; goto do_SseReRg;
//.. case Iop_QAdd16Sx8: op =3D Xsse_QADD16S; goto do_SseReRg;
@@ -3423,20 +3422,19 @@
//.. case Iop_MulHi16Ux8: op =3D Xsse_MULHI16U; goto do_SseReRg;
//.. case Iop_MulHi16Sx8: op =3D Xsse_MULHI16S; goto do_SseReRg;
//.. case Iop_Mul16x8: op =3D Xsse_MUL16; goto do_SseReRg;
-//.. case Iop_Sub8x16: op =3D Xsse_SUB8; goto do_SseReRg;
-//.. case Iop_Sub16x8: op =3D Xsse_SUB16; goto do_SseReRg;
-//.. case Iop_Sub32x4: op =3D Xsse_SUB32; goto do_SseReRg;
+ case Iop_Sub8x16: op =3D Asse_SUB8; goto do_SseReRg;
+ case Iop_Sub16x8: op =3D Asse_SUB16; goto do_SseReRg;
+ case Iop_Sub32x4: op =3D Asse_SUB32; goto do_SseReRg;
case Iop_Sub64x2: op =3D Asse_SUB64; goto do_SseReRg;
-//.. case Iop_QSub8Sx16: op =3D Xsse_QSUB8S; goto do_SseReRg;
-//.. case Iop_QSub16Sx8: op =3D Xsse_QSUB16S; goto do_SseReRg;
-//.. case Iop_QSub8Ux16: op =3D Xsse_QSUB8U; goto do_SseReRg;
-//.. case Iop_QSub16Ux8: op =3D Xsse_QSUB16U; goto do_SseReRg;
+ case Iop_QSub8Sx16: op =3D Asse_QSUB8S; goto do_SseReRg;
+ case Iop_QSub16Sx8: op =3D Asse_QSUB16S; goto do_SseReRg;
+ case Iop_QSub8Ux16: op =3D Asse_QSUB8U; goto do_SseReRg;
+ case Iop_QSub16Ux8: op =3D Asse_QSUB16U; goto do_SseReRg;
do_SseReRg: {
HReg arg1 =3D iselVecExpr(env, e->Iex.Binop.arg1);
HReg arg2 =3D iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst =3D newVRegV(env);
if (arg1isEReg) {
- goto vec_fail; /* awaiting test case */
addInstr(env, mk_vMOVsd_RR(arg2, dst));
addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
} else {
|