|
From: <sv...@va...> - 2006-08-28 13:22:21
|
Author: sewardj
Date: 2006-08-28 14:22:14 +0100 (Mon, 28 Aug 2006)
New Revision: 1646
Log:
Merge r1635,6 (SSE3 support for x86 and amd64)
Modified:
branches/VEX_3_2_BRANCH/priv/guest-amd64/toIR.c
branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c
Modified: branches/VEX_3_2_BRANCH/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/guest-amd64/toIR.c 2006-08-28 13:19:06 U=
TC (rev 1645)
+++ branches/VEX_3_2_BRANCH/priv/guest-amd64/toIR.c 2006-08-28 13:22:14 U=
TC (rev 1646)
@@ -5127,6 +5127,13 @@
loadLE(Ity_I32, mkexpr(addr))));
break;
=20
+ case 1: /* FISTTPL m32 (SSE3) */
+ DIP("fisttpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FIST m32 */
DIP("fistl %s\n", dis_buf);
storeLE( mkexpr(addr),=20
@@ -5444,6 +5451,13 @@
put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
break;
=20
+ case 1: /* FISTTPQ m64 (SSE3) */
+ DIP("fistppll %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI64, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FST double-real */
DIP("fstl %s\n", dis_buf);
storeLE(mkexpr(addr), get_ST(0));
@@ -5776,6 +5790,14 @@
loadLE(Ity_I16, mkexpr(addr)))));
break;
=20
+ case 1: /* FISTTPS m16 (SSE3) */
+ DIP("fisttps %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ unop(Iop_32to16,
+ binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_S=
T(0))) );
+ fp_pop();
+ break;
+
//.. case 2: /* FIST m16 */
//.. DIP("fistp %s\n", dis_buf);
//.. storeLE( mkexpr(addr),=20
@@ -11745,11 +11767,255 @@
goto decode_success;
}
=20
-
/* ---------------------------------------------------- */
/* --- end of the SSE/SSE2 decoder. --- */
/* ---------------------------------------------------- */
=20
+ /* ---------------------------------------------------- */
+ /* --- start of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* F3 0F 12 =3D MOVSLDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (2:2:0:0). */
+ /* F3 0F 16 =3D MOVSHDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (3:3:1:1). */
+ if (haveF3no66noF2(pfx) && sz =3D=3D 4
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x12 || insn[1] =3D=3D =
0x16)) {
+ IRTemp s3, s2, s1, s0;
+ IRTemp sV =3D newTemp(Ity_V128);
+ Bool isH =3D insn[1] =3D=3D 0x16;
+ s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INVALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("movs%cdup %s,%s\n", isH ? 'h' : 'l',
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ isH ? mk128from32s( s3, s3, s1, s1 )
+ : mk128from32s( s2, s2, s0, s0 ) );
+ goto decode_success;
+ }
+
+ /* F2 0F 12 =3D MOVDDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (0:1:0:1). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0x12) {
+ IRTemp sV =3D newTemp(Ity_V128);
+ IRTemp d0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("movddup %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movddup %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F D0 =3D ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xm=
m). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD0) {
+ IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ a3 =3D a2 =3D a1 =3D a0 =3D s3 =3D s2 =3D s1 =3D s0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+
+ breakup128to32s( addV, &a3, &a2, &a1, &a0 );
+ breakup128to32s( subV, &s3, &s2, &s1, &s0 );
+
+ putXMMReg( gregOfRexRM(pfx,modrm), mk128from32s( a3, s2, a1, s0 ))=
;
+ goto decode_success;
+ }
+
+ /* 66 0F D0 =3D ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). =
*/
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD0) {
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ IRTemp a1 =3D newTemp(Ity_I64);
+ IRTemp s0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubpd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+
+ assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
+ assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F 7D =3D HSUBPS -- 32x4 sub across from E (mem or xmm) to G (x=
mm). */
+ /* F2 0F 7C =3D HADDPS -- 32x4 add across from E (mem or xmm) to G (x=
mm). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x7C || insn[1] =3D=3D =
0x7D)) {
+ IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[1] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+ e3 =3D e2 =3D e1 =3D e0 =3D g3 =3D g2 =3D g1 =3D g0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%sps %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ breakup128to32s( eV, &e3, &e2, &e1, &e0 );
+ breakup128to32s( gV, &g3, &g2, &g1, &g0 );
+
+ assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
+ assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F 7D =3D HSUBPD -- 64x2 sub across from E (mem or xmm) to G (x=
mm). */
+ /* 66 0F 7C =3D HADDPD -- 64x2 add across from E (mem or xmm) to G (x=
mm). */
+ if (have66noF2noF3(pfx) && sz =3D=3D 2=20
+ && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x7C || insn[1] =3D=3D =
0x7D)) {
+ IRTemp e1 =3D newTemp(Ity_I64);
+ IRTemp e0 =3D newTemp(Ity_I64);
+ IRTemp g1 =3D newTemp(Ity_I64);
+ IRTemp g0 =3D newTemp(Ity_I64);
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[1] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%spd %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
+ assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
+ assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
+ assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
+
+ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
+ assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* F2 0F F0 =3D LDDQU -- move from E (mem or xmm) to G (xmm). */
+ if (haveF2no66noF3(pfx) && sz =3D=3D 4=20
+ && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xF0) {
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ goto decode_failure;
+ } else {
+ addr =3D disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ putXMMReg( gregOfRexRM(pfx,modrm),=20
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("lddqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta +=3D 2+alen;
+ }
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
/*after_sse_decoders:*/
=20
/* Get the primary opcode. */
Modified: branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c 2006-08-28 13:19:06 UTC=
(rev 1645)
+++ branches/VEX_3_2_BRANCH/priv/guest-x86/toIR.c 2006-08-28 13:22:14 UTC=
(rev 1646)
@@ -4274,6 +4274,13 @@
loadLE(Ity_I32, mkexpr(addr))));
break;
=20
+ case 1: /* FISTTPL m32 (SSE3) */
+ DIP("fisttpl %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI32, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FIST m32 */
DIP("fistl %s\n", dis_buf);
storeLE( mkexpr(addr),=20
@@ -4576,6 +4583,13 @@
put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
break;
=20
+ case 1: /* FISTTPQ m64 (SSE3) */
+ DIP("fistppll %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI64, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FST double-real */
DIP("fstl %s\n", dis_buf);
storeLE(mkexpr(addr), get_ST(0));
@@ -4939,6 +4953,13 @@
loadLE(Ity_I16, mkexpr(addr)))));
break;
=20
+ case 1: /* FISTTPS m16 (SSE3) */
+ DIP("fisttps %s\n", dis_buf);
+ storeLE( mkexpr(addr),=20
+ binop(Iop_F64toI16, mkU32(Irrm_ZERO), get_ST(0))=
);
+ fp_pop();
+ break;
+
case 2: /* FIST m16 */
DIP("fistp %s\n", dis_buf);
storeLE( mkexpr(addr),=20
@@ -10603,6 +10624,31 @@
goto decode_success;
}
=20
+ /* F2 0F 12 =3D MOVDDUP -- move from E (mem or xmm) to G (xmm),
+ duplicating some lanes (0:1:0:1). */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn=
[2] =3D=3D 0x12) {
+ IRTemp sV =3D newTemp(Ity_V128);
+ IRTemp d0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[3];
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("movddup %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+1;
+ assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movddup %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+alen;
+ }
+
+ putXMMReg( gregOfRM(modrm), binop(Iop_64HLtoV128,mkexpr(d0),mkexpr=
(d0)) );
+ goto decode_success;
+ }
+
/* F2 0F D0 =3D ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xm=
m). */
if (sz =3D=3D 4 && insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn=
[2] =3D=3D 0xD0) {
IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
@@ -10638,6 +10684,143 @@
goto decode_success;
}
=20
+ /* 66 0F D0 =3D ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). =
*/
+ if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && insn[1] =3D=3D 0xD0) {
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp addV =3D newTemp(Ity_V128);
+ IRTemp subV =3D newTemp(Ity_V128);
+ IRTemp a1 =3D newTemp(Ity_I64);
+ IRTemp s0 =3D newTemp(Ity_I64);
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("addsubpd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("addsubpd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+
+ assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
+ assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
+
+ putXMMReg( gregOfRM(modrm),=20
+ binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
+ goto decode_success;
+ }
+
+ /* F2 0F 7D =3D HSUBPS -- 32x4 sub across from E (mem or xmm) to G (x=
mm). */
+ /* F2 0F 7C =3D HADDPS -- 32x4 add across from E (mem or xmm) to G (x=
mm). */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F=20
+ && (insn[2] =3D=3D 0x7C || insn[2] =3D=3D 0x7D)) {
+ IRTemp e3, e2, e1, e0, g3, g2, g1, g0;
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[2] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+ e3 =3D e2 =3D e1 =3D e0 =3D g3 =3D g2 =3D g1 =3D g0 =3D IRTemp_INV=
ALID;
+
+ modrm =3D insn[3];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("h%sps %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+1;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%sps %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ breakup128to32s( eV, &e3, &e2, &e1, &e0 );
+ breakup128to32s( gV, &g3, &g2, &g1, &g0 );
+
+ assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
+ assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+
+ putXMMReg( gregOfRM(modrm),=20
+ binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* 66 0F 7D =3D HSUBPD -- 64x2 sub across from E (mem or xmm) to G (x=
mm). */
+ /* 66 0F 7C =3D HADDPD -- 64x2 add across from E (mem or xmm) to G (x=
mm). */
+ if (sz =3D=3D 2 && insn[0] =3D=3D 0x0F && (insn[1] =3D=3D 0x7C || ins=
n[1] =3D=3D 0x7D)) {
+ IRTemp e1 =3D newTemp(Ity_I64);
+ IRTemp e0 =3D newTemp(Ity_I64);
+ IRTemp g1 =3D newTemp(Ity_I64);
+ IRTemp g0 =3D newTemp(Ity_I64);
+ IRTemp eV =3D newTemp(Ity_V128);
+ IRTemp gV =3D newTemp(Ity_V128);
+ IRTemp leftV =3D newTemp(Ity_V128);
+ IRTemp rightV =3D newTemp(Ity_V128);
+ Bool isAdd =3D insn[1] =3D=3D 0x7C;
+ HChar* str =3D isAdd ? "add" : "sub";
+
+ modrm =3D insn[2];
+ if (epartIsReg(modrm)) {
+ assign( eV, getXMMReg( eregOfRM(modrm)) );
+ DIP("h%spd %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 2+1;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("h%spd %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 2+alen;
+ }
+
+ assign( gV, getXMMReg(gregOfRM(modrm)) );
+
+ assign( e1, unop(Iop_V128HIto64, mkexpr(eV) ));
+ assign( e0, unop(Iop_V128to64, mkexpr(eV) ));
+ assign( g1, unop(Iop_V128HIto64, mkexpr(gV) ));
+ assign( g0, unop(Iop_V128to64, mkexpr(gV) ));
+
+ assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
+ assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+
+ putXMMReg( gregOfRM(modrm),=20
+ binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,=20
+ mkexpr(leftV), mkexpr(rightV) ) );
+ goto decode_success;
+ }
+
+ /* F2 0F F0 =3D LDDQU -- move from E (mem or xmm) to G (xmm). */
+ if (sz =3D=3D 4 && insn[0] =3D=3D 0xF2 && insn[1] =3D=3D 0x0F && insn=
[2] =3D=3D 0xF0) {
+ modrm =3D getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ goto decode_failure;
+ } else {
+ addr =3D disAMode ( &alen, sorb, delta+3, dis_buf );
+ putXMMReg( gregOfRM(modrm),=20
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("lddqu %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta +=3D 3+alen;
+ }
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE3 decoder. --- */
/* ---------------------------------------------------- */
|