|
From: <sv...@va...> - 2006-08-04 14:51:24
|
Author: sewardj
Date: 2006-08-04 15:51:19 +0100 (Fri, 04 Aug 2006)
New Revision: 1636
Log:
64-bit equivalent to r1635: handle all SSE3 instructions except
monitor and mwait in 64-bit mode. Regression tests to follow soon.
Modified:
trunk/priv/guest-amd64/toIR.c
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2006-08-03 15:03:19 UTC (rev 1635)
+++ trunk/priv/guest-amd64/toIR.c 2006-08-04 14:51:19 UTC (rev 1636)
@@ -5127,6 +5127,13 @@
loadLE(Ity_I32, mkexpr(addr))));
break;
=20
+ case 1: /* FISTTPL m32 (SSE3) */
+ DIP("fisttpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FIST m32 */
DIP("fistl %s\n", dis_buf);
storeLE( mkexpr(addr),=20
@@ -5444,6 +5451,13 @@
put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
break;
=20
+ case 1: /* FISTTPQ m64 (SSE3) */
+ DIP("fistppll %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI64, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FST double-real */
DIP("fstl %s\n", dis_buf);
storeLE(mkexpr(addr), get_ST(0));
@@ -5776,6 +5790,14 @@
loadLE(Ity_I16, mkexpr(addr)))));
break;
=20
+ case 1: /* FISTTPS m16 (SSE3) */
+ DIP("fisttps %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ unop(Iop_32to16,
+ binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_S=
T(0))) );
+ fp_pop();
+ break;
+
//.. case 2: /* FIST m16 */
//.. DIP("fistp %s\n", dis_buf);
//.. storeLE( mkexpr(addr),=20
@@ -11745,11 +11767,255 @@
goto decode_success;
}
=20
-
/* ---------------------------------------------------- */
/* --- end of the SSE/SSE2 decoder. --- */
/* ---------------------------------------------------- */
=20
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* F3 0F 12 =3D MOVSLDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (2:2:0:0). */
+ /* F3 0F 16 =3D MOVSHDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (3:3:1:1). */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x12 || insn[1] =3D=3D =
0x16)) {
+ IRTemp s3, s2, s1, s0;
+ IRTemp sV =3D newTemp(Ity_V128);
+ Bool isH =3D insn[1] =3D=3D 0x16;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ isH ? mk128from32s( s3, s3, s1, s1 )
+ : mk128from32s( s2, s2, s0, s0 ) );
+ goto decode_success;
+ }
+
+ /* F2 0F 12 =3D MOVDDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (0:1:0:1). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x12) {
+ IRTemp sV =3D newTemp(Ity_V128);
+ IRTemp d0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movddup %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F D0 =3D ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xm=
m). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD0) {
+ IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ a3 =3D a2 =3D a1 =3D a0 =3D s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+
+ breakup128to32s( addV, &a3, &a2, &a1, &a0 );
+ breakup128to32s( subV, &s3, &s2, &s1, &s0 );
+
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ))=
;
+ goto decode_success;
+ }
+
+ /* 66 0F D0 =3D ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). =
*/
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD0) {
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ IRTemp a1 =3D newTemp(Ity_I64);
+ IRTemp s0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubpd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+
+ assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
+ assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F 7D =3D HSUBPS -- 32x4 sub across from E (mem or xmm) to G (x=
mm). */
+ /* F2 0F 7C =3D HADDPS -- 32x4 add across from E (mem or xmm) to G (x=
mm). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x7C || insn[1] =3D=3D =
0x7D)) {
+ IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[1] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+ e3 =3D e2 =3D e1 =3D e0 =3D g3 =3D g2 =3D g1 =3D g0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%sps %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ breakup128to32s( eV, &e3, &e2, &e1, &e0 );
+ breakup128to32s( gV, &g3, &g2, &g1, &g0 );
+
+ assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
+ assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F 7D =3D HSUBPD -- 64x2 sub across from E (mem or xmm) to G (x=
mm). */
+ /* 66 0F 7C =3D HADDPD -- 64x2 add across from E (mem or xmm) to G (x=
mm). */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x7C || insn[1] =3D=3D =
0x7D)) {
+ IRTemp e1 =3D newTemp(Ity_I64);
+ IRTemp e0 =3D newTemp(Ity_I64);
+ IRTemp g1 =3D newTemp(Ity_I64);
+ IRTemp g0 =3D newTemp(Ity_I64);
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[1] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%spd %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
+ assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
+ assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
+ assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
+
+ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
+ assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* F2 0F F0 =3D LDDQU -- move from E (mem or xmm) to G (xmm). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF0) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ goto decode_failure;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("lddqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
/*after_sse_decoders:*/
=20
/* Get the primary opcode. */
|