|
From: <sv...@va...> - 2005-05-08 23:03:56
|
Author: sewardj
Date: 2005-05-09 00:03:48 +0100 (Mon, 09 May 2005)
New Revision: 1170
Modified:
trunk/priv/guest-amd64/gdefs.h
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/hdefs.h
trunk/priv/host-amd64/isel.c
Log:
Make a whole bunch more x87 instructions work on amd64.
Modified: trunk/priv/guest-amd64/gdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/gdefs.h 2005-05-07 01:01:24 UTC (rev 1169)
+++ trunk/priv/guest-amd64/gdefs.h 2005-05-08 23:03:48 UTC (rev 1170)
@@ -168,10 +168,10 @@
#define AMD64G_CC_MASK_P (1 << AMD64G_CC_SHIFT_P)
=20
/* FPU flag masks */
-//#define AMD64G_FC_MASK_C3 (1 << 14)
-//#define AMD64G_FC_MASK_C2 (1 << 10)
-//#define AMD64G_FC_MASK_C1 (1 << 9)
-//#define AMD64G_FC_MASK_C0 (1 << 8)
+#define AMD64G_FC_MASK_C3 (1 << 14)
+#define AMD64G_FC_MASK_C2 (1 << 10)
+#define AMD64G_FC_MASK_C1 (1 << 9)
+#define AMD64G_FC_MASK_C0 (1 << 8)
=20
/* %RFLAGS thunk descriptors. A four-word thunk is used to record
details of the most recent flag-setting operation, so the flags can
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2005-05-07 01:01:24 UTC (rev 1169)
+++ trunk/priv/guest-amd64/toIR.c 2005-05-08 23:03:48 UTC (rev 1170)
@@ -338,7 +338,7 @@
#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
-//.. #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
+#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
//..=20
//.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
@@ -4124,18 +4124,19 @@
stmt( IRStmt_Put( OFFB_FTOP, e ) );
}
=20
-//.. /* --------- Get/put the C3210 bits. --------- */
-//..=20
-//.. static IRExpr* get_C3210 ( void )
-//.. {
-//.. return IRExpr_Get( OFFB_FC3210, Ity_I32 );
-//.. }
-//..=20
-//.. static void put_C3210 ( IRExpr* e )
-//.. {
-//.. stmt( IRStmt_Put( OFFB_FC3210, e ) );
-//.. }
+/* --------- Get/put the C3210 bits. --------- */
=20
+static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
+{
+ return IRExpr_Get( OFFB_FC3210, Ity_I64 );
+}
+
+static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
+{
+ vassert(typeOfIRExpr(irbb->tyenv, e) =3D=3D Ity_I64);
+ stmt( IRStmt_Put( OFFB_FC3210, e ) );
+}
+
/* --------- Get/put the FPU rounding mode. --------- */
static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
{
@@ -4257,15 +4258,15 @@
put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
}
=20
-//.. /* Clear the C2 bit of the FPU status register, for
-//.. sin/cos/tan/sincos. */
-//..=20
-//.. static void clear_C2 ( void )
-//.. {
-//.. put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)=
) );
-//.. }
+/* Clear the C2 bit of the FPU status register, for
+ sin/cos/tan/sincos. */
=20
+static void clear_C2 ( void )
+{
+ put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) )=
;
+}
=20
+
/* ------------------------------------------------------- */
/* Given all that stack-mangling junk, we can now go ahead
and describe FP instructions.=20
@@ -4824,18 +4825,18 @@
put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL=
)));
break;
=20
-//.. case 0xF0: /* F2XM1 */
-//.. DIP("f2xm1\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0)));
-//.. break;
-//..=20
-//.. case 0xF1: /* FYL2X */
-//.. DIP("fyl2x\n");
-//.. put_ST_UNCHECKED(1, binop(Iop_Yl2xF64,
-//.. get_ST(1), get_ST(0)));
-//.. fp_pop();
-//.. break;
-//..=20
+ case 0xF0: /* F2XM1 */
+ DIP("f2xm1\n");
+ put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0)));
+ break;
+
+ case 0xF1: /* FYL2X */
+ DIP("fyl2x\n");
+ put_ST_UNCHECKED(1, binop(Iop_Yl2xF64,
+ get_ST(1), get_ST(0)));
+ fp_pop();
+ break;
+
//.. case 0xF2: /* FPTAN */
//.. DIP("ftan\n");
//.. put_ST_UNCHECKED(0, unop(Iop_TanF64, get_ST(0)));
@@ -4843,14 +4844,14 @@
//.. put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
//.. clear_C2(); /* HACK */
//.. break;
-//..=20
-//.. case 0xF3: /* FPATAN */
-//.. DIP("fpatan\n");
-//.. put_ST_UNCHECKED(1, binop(Iop_AtanF64,
-//.. get_ST(1), get_ST(0)));
-//.. fp_pop();
-//.. break;
-//..=20
+
+ case 0xF3: /* FPATAN */
+ DIP("fpatan\n");
+ put_ST_UNCHECKED(1, binop(Iop_AtanF64,
+ get_ST(1), get_ST(0)));
+ fp_pop();
+ break;
+
//.. case 0xF5: { /* FPREM1 -- IEEE compliant */
//.. IRTemp a1 =3D newTemp(Ity_F64);
//.. IRTemp a2 =3D newTemp(Ity_F64);
@@ -4896,41 +4897,41 @@
put_ST_UNCHECKED(0, unop(Iop_SqrtF64, get_ST(0)));
break;
=20
-//.. case 0xFB: { /* FSINCOS */
-//.. IRTemp a1 =3D newTemp(Ity_F64);
-//.. assign( a1, get_ST(0) );
-//.. DIP("fsincos\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1)));
-//.. fp_push();
-//.. put_ST(0, unop(Iop_CosF64, mkexpr(a1)));
-//.. clear_C2(); /* HACK */
-//.. break;
-//.. }
-//..=20
-//.. case 0xFC: /* FRNDINT */
-//.. DIP("frndint\n");
-//.. put_ST_UNCHECKED(0,
-//.. binop(Iop_RoundF64, get_roundingmode(), get_ST(0)=
) );
-//.. break;
-//..=20
-//.. case 0xFD: /* FSCALE */
-//.. DIP("fscale\n");
-//.. put_ST_UNCHECKED(0, binop(Iop_ScaleF64,
-//.. get_ST(0), get_ST(1)));
-//.. break;
-//..=20
-//.. case 0xFE: /* FSIN */
-//.. DIP("fsin\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0)));
-//.. clear_C2(); /* HACK */
-//.. break;
-//..=20
-//.. case 0xFF: /* FCOS */
-//.. DIP("fcos\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0)));
-//.. clear_C2(); /* HACK */
-//.. break;
+ case 0xFB: { /* FSINCOS */
+ IRTemp a1 =3D newTemp(Ity_F64);
+ assign( a1, get_ST(0) );
+ DIP("fsincos\n");
+ put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1)));
+ fp_push();
+ put_ST(0, unop(Iop_CosF64, mkexpr(a1)));
+ clear_C2(); /* HACK */
+ break;
+ }
=20
+ case 0xFC: /* FRNDINT */
+ DIP("frndint\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 0xFD: /* FSCALE */
+ DIP("fscale\n");
+ put_ST_UNCHECKED(0, binop(Iop_ScaleF64,
+ get_ST(0), get_ST(1)));
+ break;
+
+ case 0xFE: /* FSIN */
+ DIP("fsin\n");
+ put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xFF: /* FCOS */
+ DIP("fcos\n");
+ put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
default:
goto decode_fail;
}
@@ -5037,6 +5038,16 @@
get_ST(0), get_ST(r_src)) );
break;
=20
+ case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
+ r_src =3D (UInt)modrm - 0xD8;
+ DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,=20
+ IRExpr_Mux0X(=20
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(A=
MD64CondP)),=20
+ get_ST(0), get_ST(r_src)) );
+ break;
+
//.. case 0xE9: /* FUCOMPP %st(0),%st(1) */
//.. DIP("fucompp %%st(0),%%st(1)\n");
//.. /* This forces C1 to zero, which isn't right. */
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2005-05-07 01:01:24 UTC (rev 1169)
+++ trunk/priv/host-amd64/hdefs.c 2005-05-08 23:03:48 UTC (rev 1170)
@@ -558,30 +558,30 @@
}
}
=20
-//.. HChar* showAMD64FpOp ( AMD64FpOp op ) {
-//.. switch (op) {
+HChar* showA87FpOp ( A87FpOp op ) {
+ switch (op) {
//.. case Xfp_ADD: return "add";
//.. case Xfp_SUB: return "sub";
//.. case Xfp_MUL: return "mul";
//.. case Xfp_DIV: return "div";
-//.. case Xfp_SCALE: return "scale";
-//.. case Xfp_ATAN: return "atan";
-//.. case Xfp_YL2X: return "yl2x";
+ case Afp_SCALE: return "scale";
+ case Afp_ATAN: return "atan";
+ case Afp_YL2X: return "yl2x";
//.. case Xfp_YL2XP1: return "yl2xp1";
//.. case Xfp_PREM: return "prem";
//.. case Xfp_PREM1: return "prem1";
-//.. case Xfp_SQRT: return "sqrt";
+ case Afp_SQRT: return "sqrt";
//.. case Xfp_ABS: return "abs";
//.. case Xfp_NEG: return "chs";
//.. case Xfp_MOV: return "mov";
-//.. case Xfp_SIN: return "sin";
-//.. case Xfp_COS: return "cos";
+ case Afp_SIN: return "sin";
+ case Afp_COS: return "cos";
//.. case Xfp_TAN: return "tan";
-//.. case Xfp_ROUND: return "round";
-//.. case Xfp_2XM1: return "2xm1";
-//.. default: vpanic("showAMD64FpOp");
-//.. }
-//.. }
+ case Afp_ROUND: return "round";
+ case Afp_2XM1: return "2xm1";
+ default: vpanic("showA87FpOp");
+ }
+}
=20
HChar* showAMD64SseOp ( AMD64SseOp op ) {
switch (op) {
@@ -807,10 +807,40 @@
}
AMD64Instr* AMD64Instr_MFence ( void )
{
- AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag =3D Ain_MFence;
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_MFence;
return i;
}
+AMD64Instr* AMD64Instr_A87Free ( Int nregs )
+{
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_A87Free;
+ i->Ain.A87Free.nregs =3D nregs;
+ vassert(nregs >=3D 1 && nregs <=3D 7);
+ return i;
+}
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
+{
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_A87PushPop;
+ i->Ain.A87PushPop.addr =3D addr;
+ i->Ain.A87PushPop.isPush =3D isPush;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
+{
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_A87FpOp;
+ i->Ain.A87FpOp.op =3D op;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
+{
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_A87LdCW;
+ i->Ain.A87LdCW.addr =3D addr;
+ return i;
+}
=20
//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst )=
{
//.. AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1147,6 +1177,20 @@
case Ain_MFence:
vex_printf("mfence" );
return;
+ case Ain_A87Free:
+ vex_printf("ffree %%st(7..%d)\n", 7 - i->Ain.A87Free.nregs );
+ break;
+ case Ain_A87PushPop:
+ vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
+ ppAMD64AMode(i->Ain.A87PushPop.addr);
+ break;
+ case Ain_A87FpOp:
+ vex_printf("f%s\n", showA87FpOp(i->Ain.A87FpOp.op));
+ break;
+ case Ain_A87LdCW:
+ vex_printf("fldcw ");
+ ppAMD64AMode(i->Ain.A87LdCW.addr);
+ break;
//.. case Xin_FpUnary:
//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
//.. ppHRegAMD64(i->Xin.FpUnary.src);
@@ -1457,6 +1501,16 @@
return;
case Ain_MFence:
return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
+ return;
//.. case Xin_FpUnary:
//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
@@ -1583,7 +1637,7 @@
}
=20
/* local helper */
-static void mapReg(HRegRemap* m, HReg* r)
+static inline void mapReg(HRegRemap* m, HReg* r)
{
*r =3D lookupHRegRemap(m, *r);
}
@@ -1655,6 +1709,16 @@
return;
case Ain_MFence:
return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
+ return;
//.. case Xin_FpUnary:
//.. mapReg(m, &i->Xin.FpUnary.src);
//.. mapReg(m, &i->Xin.FpUnary.dst);
@@ -2080,14 +2144,15 @@
}
=20
=20
-//.. /* Emit ffree %st(7) */
-//.. static UChar* do_ffree_st7 ( UChar* p )
-//.. {
-//.. *p++ =3D 0xDD;
-//.. *p++ =3D 0xC7;
-//.. return p;
-//.. }
-//..=20
+/* Emit ffree %st(N) */
+static UChar* do_ffree_st ( UChar* p, Int n )
+{
+ vassert(n >=3D 0 && n <=3D 7);
+ *p++ =3D 0xDD;
+ *p++ =3D toUChar(0xC0 + n);
+ return p;
+}
+
//.. /* Emit fstp %st(i), 1 <=3D i <=3D 7 */
//.. static UChar* do_fstp_st ( UChar* p, Int i )
//.. {
@@ -2187,6 +2252,7 @@
UChar rex;
UChar* p =3D &buf[0];
UChar* ptmp;
+ Int j;
vassert(nbuf >=3D 32);
=20
/* Wrap an integer as a int register, for use assembling
@@ -2744,6 +2810,51 @@
*p++ =3D 0x0F; *p++ =3D 0xAE; *p++ =3D 0xF0;
goto done;
=20
+ case Ain_A87Free:
+ vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <=3D 7);
+ for (j =3D 0; j < i->Ain.A87Free.nregs; j++) {
+ p =3D do_ffree_st(p, 7-j);
+ }
+ goto done;
+
+ case Ain_A87PushPop:
+ if (i->Ain.A87PushPop.isPush) {
+ /* Load from memory into %st(0): fldl amode */
+ *p++ =3D clearWBit(
+ rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
+ *p++ =3D 0xDD;
+ p =3D doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
+ } else {
+ /* Dump %st(0) to memory: fstpl amode */
+ *p++ =3D clearWBit(
+ rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
+ *p++ =3D 0xDD;
+ p =3D doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr=
);
+ goto done;
+ }
+ goto done;
+
+ case Ain_A87FpOp:
+ switch (i->Ain.A87FpOp.op) {
+ case Afp_SQRT: *p++ =3D 0xD9; *p++ =3D 0xFA; break;
+ case Afp_SIN: *p++ =3D 0xD9; *p++ =3D 0xFE; break;
+ case Afp_COS: *p++ =3D 0xD9; *p++ =3D 0xFF; break;
+ case Afp_ROUND: *p++ =3D 0xD9; *p++ =3D 0xFC; break;
+ case Afp_2XM1: *p++ =3D 0xD9; *p++ =3D 0xF0; break;
+ case Afp_SCALE: *p++ =3D 0xD9; *p++ =3D 0xFD; break;
+ case Afp_ATAN: *p++ =3D 0xD9; *p++ =3D 0xF3; break;
+ case Afp_YL2X: *p++ =3D 0xD9; *p++ =3D 0xF1; break;
+ default: goto bad;
+ }
+ goto done;
+
+ case Ain_A87LdCW:
+ *p++ =3D clearWBit(
+ rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
+ *p++ =3D 0xD9;
+ p =3D doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
+ goto done;
+
case Ain_Store:
if (i->Ain.Store.sz =3D=3D 2) {
/* This just goes to show the crazyness of the instruction
Modified: trunk/priv/host-amd64/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.h 2005-05-07 01:01:24 UTC (rev 1169)
+++ trunk/priv/host-amd64/hdefs.h 2005-05-08 23:03:48 UTC (rev 1170)
@@ -291,22 +291,23 @@
extern HChar* showAMD64ShiftOp ( AMD64ShiftOp );
=20
=20
-//.. /* --------- */
-//.. typedef
-//.. enum {
-//.. Xfp_INVALID,
-//.. /* Binary */
+/* --------- */
+typedef
+ enum {
+ Afp_INVALID,
+ /* Binary */
//.. Xfp_ADD, Xfp_SUB, Xfp_MUL, Xfp_DIV,=20
-//.. Xfp_SCALE, Xfp_ATAN, Xfp_YL2X, Xfp_YL2XP1, Xfp_PREM, Xfp_PREM=
1,
-//.. /* Unary */
-//.. Xfp_SQRT, Xfp_ABS, Xfp_NEG, Xfp_MOV, Xfp_SIN, Xfp_COS, Xfp_TA=
N,
-//.. Xfp_ROUND, Xfp_2XM1
-//.. }
-//.. X86FpOp;
-//..=20
-//.. extern HChar* showX86FpOp ( X86FpOp );
+ Afp_SCALE, Afp_ATAN, Afp_YL2X, //Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1,
+ /* Unary */
+ Afp_SQRT, //Xfp_ABS, Xfp_NEG, Xfp_MOV,=20
+ Afp_SIN, Afp_COS, //Xfp_TAN,
+ Afp_ROUND, Afp_2XM1
+ }
+ A87FpOp;
=20
+extern HChar* showA87FpOp ( A87FpOp );
=20
+
/* --------- */
typedef
enum {
@@ -357,25 +358,29 @@
/* --------- */
typedef
enum {
- Ain_Imm64, /* Generate 64-bit literal to register */
- Ain_Alu64R, /* 64-bit mov/arith/logical, dst=3DREG */
- Ain_Alu64M, /* 64-bit mov/arith/logical, dst=3DMEM */
- Ain_Sh64, /* 64-bit shift/rotate, dst=3DREG or MEM */
- Ain_Test64, /* 64-bit test (AND, set flags, discard result) */
- Ain_Unary64, /* 64-bit not and neg */
- Ain_MulL, /* widening multiply */
- Ain_Div, /* div and mod */
+ Ain_Imm64, /* Generate 64-bit literal to register */
+ Ain_Alu64R, /* 64-bit mov/arith/logical, dst=3DREG */
+ Ain_Alu64M, /* 64-bit mov/arith/logical, dst=3DMEM */
+ Ain_Sh64, /* 64-bit shift/rotate, dst=3DREG or MEM */
+ Ain_Test64, /* 64-bit test (AND, set flags, discard result) *=
/
+ Ain_Unary64, /* 64-bit not and neg */
+ Ain_MulL, /* widening multiply */
+ Ain_Div, /* div and mod */
//.. Xin_Sh3232, /* shldl or shrdl */
- Ain_Push, /* push 64-bit value on stack */
- Ain_Call, /* call to address in register */
- Ain_Goto, /* conditional/unconditional jmp to dst */
- Ain_CMov64, /* conditional move */
- Ain_MovZLQ, /* reg-reg move, zeroing out top half */
- Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
- Ain_Store, /* store 32/16/8 bit value in memory */
- Ain_Set64, /* convert condition code to 64-bit value */
- Ain_Bsfr64, /* 64-bit bsf/bsr */
- Ain_MFence, /* mem fence */
+ Ain_Push, /* push 64-bit value on stack */
+ Ain_Call, /* call to address in register */
+ Ain_Goto, /* conditional/unconditional jmp to dst */
+ Ain_CMov64, /* conditional move */
+ Ain_MovZLQ, /* reg-reg move, zeroing out top half */
+ Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
+ Ain_Store, /* store 32/16/8 bit value in memory */
+ Ain_Set64, /* convert condition code to 64-bit value */
+ Ain_Bsfr64, /* 64-bit bsf/bsr */
+ Ain_MFence, /* mem fence */
+ Ain_A87Free, /* free up x87 registers */
+ Ain_A87PushPop, /* x87 loads/stores */
+ Ain_A87FpOp, /* x87 operations */
+ Ain_A87LdCW, /* load x87 control word */
//..=20
//.. Xin_FpUnary, /* FP fake unary op */
//.. Xin_FpBinary, /* FP fake binary op */
@@ -383,25 +388,25 @@
//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int =
*/
//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single *=
/
//.. Xin_FpCMov, /* FP fake floating point conditional move */
- Ain_LdMXCSR, /* load %mxcsr */
+ Ain_LdMXCSR, /* load %mxcsr */
//.. Xin_FpStSW_AX, /* fstsw %ax */
- Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
- register */
- Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
- Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
- Ain_SseSDSS, /* scalar float32 to/from float64 */
+ Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
+ register */
+ Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
+ Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
+ Ain_SseSDSS, /* scalar float32 to/from float64 */
//..=20
//.. Xin_SseConst, /* Generate restricted SSE literal */
- Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
- constraints, upper 96/64/0 bits arbitrary */
- Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg *=
/
- Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
- Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
- Ain_Sse64Fx2, /* SSE binary, 64Fx2 */
- Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
- Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
- Ain_SseCMov, /* SSE conditional move */
- Ain_SseShuf /* SSE2 shuffle (pshufd) */
+ Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
+ constraints, upper 96/64/0 bits arbitrary */
+ Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg=
*/
+ Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
+ Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
+ Ain_Sse64Fx2, /* SSE binary, 64Fx2 */
+ Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
+ Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
+ Ain_SseCMov, /* SSE conditional move */
+ Ain_SseShuf /* SSE2 shuffle (pshufd) */
}
AMD64InstrTag;
=20
@@ -519,47 +524,38 @@
struct {
} MFence;
=20
-//.. /* X86 Floating point (fake 3-operand, "flat reg file" ins=
ns) */
-//.. struct {
-//.. X86FpOp op;
-//.. HReg src;
-//.. HReg dst;
-//.. } FpUnary;
-//.. struct {
-//.. X86FpOp op;
-//.. HReg srcL;
-//.. HReg srcR;
-//.. HReg dst;
-//.. } FpBinary;
-//.. struct {
-//.. Bool isLoad;
-//.. UChar sz; /* only 4 (IEEE single) or 8 (IEEE double=
) */
-//.. HReg reg;
-//.. X86AMode* addr;
-//.. } FpLdSt;
-//.. /* Move 64-bit float to/from memory, converting to/from
-//.. signed int on the way. Note the conversions will obser=
ve
-//.. the host FPU rounding mode currently in force. */
-//.. struct {
-//.. Bool isLoad;
-//.. UChar sz; /* only 2, 4 or 8 */
-//.. HReg reg;
-//.. X86AMode* addr;
-//.. } FpLdStI;
-//.. /* By observing the current FPU rounding mode, round (etc)
-//.. src into dst given that dst should be interpreted as an
-//.. IEEE754 32-bit (float) type. */
-//.. struct {
-//.. HReg src;
-//.. HReg dst;
-//.. } Fp64to32;
-//.. /* Mov src to dst on the given condition, which may not
-//.. be the bogus Xcc_ALWAYS. */
-//.. struct {
-//.. X86CondCode cond;
-//.. HReg src;
-//.. HReg dst;
-//.. } FpCMov;
+ /* --- X87 --- */
+
+ /* A very minimal set of x87 insns, that operate exactly in a
+ stack-like way so no need to think about x87 registers. */
+
+ /* Do 'ffree' on %st(7) .. %st(7-nregs) */
+ struct {
+ Int nregs; /* 1 <=3D nregs <=3D 7 */
+ } A87Free;
+
+ /* Push a 64-bit FP value from memory onto the stack, or move
+ a value from the stack to memory and remove it from the
+ stack. */
+ struct {
+ AMD64AMode* addr;
+ Bool isPush;
+ } A87PushPop;
+
+ /* Do an operation on the top-of-stack. This can be unary, in
+ which case it is %st0 =3D OP( %st0 ), or binary: %st0 =3D OP=
(
+ %st0, %st1 ). */
+ struct {
+ A87FpOp op;
+ } A87FpOp;
+
+ /* Load the FPU control word. */
+ struct {
+ AMD64AMode* addr;
+ } A87LdCW;
+
+ /* --- SSE --- */
+
/* Load 32 bits into %mxcsr. */
struct {
AMD64AMode* addr;
@@ -656,26 +652,30 @@
}
AMD64Instr;
=20
-extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst );
-extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg );
-extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AM=
ode* );
-extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst =
);
-extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* )=
;
-extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst );
-extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* )=
;
-extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* )=
;
+extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst );
+extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg )=
;
+extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64A=
Mode* );
+extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst=
);
+extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* =
);
+extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst );
+extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* =
);
+extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* =
);
//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, H=
Reg src, HReg dst );
-extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
-extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
-extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond=
, AMD64RI* dst );
-extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, H=
Reg dst );
-extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
- AMD64AMode* src, HReg dst );
-extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode=
* dst );
-extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst )=
;
-extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg ds=
t );
-extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
+extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
+extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode con=
d, AMD64RI* dst );
+extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, =
HReg dst );
+extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
+ AMD64AMode* src, HReg dst );
+extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMod=
e* dst );
+extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst =
);
+extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg d=
st );
+extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush=
);
+extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
+extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
//..=20
//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, H=
Reg dst );
//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, =
HReg srcR, HReg dst );
@@ -683,23 +683,23 @@
//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HR=
eg reg, AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, =
HReg dst );
-extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
+extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
-extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, =
HReg dst );
-extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HR=
eg dst );
-extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HR=
eg dst );
-extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg ds=
t );
+extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR,=
HReg dst );
+extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, H=
Reg dst );
+extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, H=
Reg dst );
+extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg d=
st );
//..=20
//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
-extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD=
64AMode* );
-extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg =
dst );
-extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst =
);
+extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AM=
D64AMode* );
+extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg=
dst );
+extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst=
);
=20
=20
extern void ppAMD64Instr ( AMD64Instr* );
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-05-07 01:01:24 UTC (rev 1169)
+++ trunk/priv/host-amd64/isel.c 2005-05-08 23:03:48 UTC (rev 1170)
@@ -618,20 +618,19 @@
add_to_rsp(env, 8);
}
=20
-//.. /* Mess with the FPU's rounding mode: set to the default rounding m=
ode
-//.. (DEFAULT_FPUCW). */
-//.. static=20
-//.. void set_FPU_rounding_default ( ISelEnv* env )
-//.. {
-//.. /* pushl $DEFAULT_FPUCW
-//.. fldcw 0(%esp)
-//.. addl $4, %esp=20
-//.. */
-//.. X86AMode* zero_esp =3D X86AMode_IR(0, hregX86_ESP());
-//.. addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
-//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
-//.. add_to_esp(env, 4);
-//.. }
+/* Mess with the FPU's rounding mode: set to the default rounding mode
+ (DEFAULT_FPUCW). */
+static=20
+void set_FPU_rounding_default ( ISelEnv* env )
+{
+ /* movq $DEFAULT_FPUCW, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Alu64M(
+ Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
=20
=20
/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
@@ -669,36 +668,36 @@
}
=20
=20
-//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
-//.. expression denoting a value in the range 0 .. 3, indicating a ro=
und
-//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to hav=
e
-//.. the same rounding.
-//.. */
-//.. static
-//.. void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
-//.. {
-//.. HReg rrm =3D iselIntExpr_R(env, mode);
-//.. HReg rrm2 =3D newVRegI(env);
-//.. X86AMode* zero_esp =3D X86AMode_IR(0, hregX86_ESP());
-//..=20
-//.. /* movl %rrm, %rrm2
-//.. andl $3, %rrm2 -- shouldn't be needed; paranoia
-//.. shll $10, %rrm2
-//.. orl $DEFAULT_FPUCW, %rrm2
-//.. pushl %rrm2
-//.. fldcw 0(%esp)
-//.. addl $4, %esp
-//.. */
-//.. addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
-//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, X86RM_Reg(rrm2)));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW)=
, rrm2));
-//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
-//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
-//.. add_to_esp(env, 4);
-//.. }
+/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the x87 FPU to have
+ the same rounding.
+*/
+static
+void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ HReg rrm =3D iselIntExpr_R(env, mode);
+ HReg rrm2 =3D newVRegI(env);
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
=20
+ /* movq %rrm, %rrm2
+ andq $3, %rrm2 -- shouldn't be needed; paranoia
+ shlq $10, %rrm2
+ orq $DEFAULT_FPUCW, %rrm2
+ movq %rrm2, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, AMD64RM_Reg(rrm2)));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,=20
+ AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,=20
+ AMD64RI_Reg(rrm2), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
=20
+
/* Generate !src into a new vector register. Amazing that there isn't
a less crappy way to do this.
*/
@@ -1315,7 +1314,7 @@
sub_from_rsp(env, 16);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rs=
p0));
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,=20
- AMD64RMI_Mem(rspN), dst ));
+ AMD64RMI_Mem(rspN), dst ));
add_to_rsp(env, 16);
return dst;
}
@@ -2811,26 +2810,74 @@
//.. return res;
//.. }
//.. }
-//..=20
-//.. if (e->tag =3D=3D Iex_Binop && e->Iex.Binop.op =3D=3D Iop_RoundF=
64) {
-//.. HReg rf =3D iselDblExpr(env, e->Iex.Binop.arg2);
-//.. HReg dst =3D newVRegF(env);
-//..=20
-//.. /* rf now holds the value to be rounded. The first thing to =
do
-//.. is set the FPU's rounding mode accordingly. */
-//..=20
-//.. /* Set host rounding mode */
-//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
-//..=20
-//.. /* grndint %rf, %dst */
-//.. addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
-//..=20
-//.. /* Restore default FPU rounding. */
-//.. set_FPU_rounding_default( env );
-//..=20
-//.. return dst;
-//.. }
=20
+ if (e->tag =3D=3D Iex_Binop && e->Iex.Binop.op =3D=3D Iop_RoundF64) {
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg =3D iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst =3D newVRegV(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host x87 rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+
+ /* Restore default x87 rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ if (e->tag =3D=3D Iex_Binop=20
+ && (e->Iex.Binop.op =3D=3D Iop_ScaleF64
+ || e->Iex.Binop.op =3D=3D Iop_AtanF64
+ || e->Iex.Binop.op =3D=3D Iop_Yl2xF64)
+ ) {
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg1 =3D iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 =3D iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst =3D newVRegV(env);
+ Bool arg2first =3D toBool(e->Iex.Binop.op =3D=3D Iop_ScaleF64)=
;
+ addInstr(env, AMD64Instr_A87Free(2));
+
+ /* one arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg2 : arg1, m8_rs=
p));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ /* other arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg1 : arg2, m8_rs=
p));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ /* do it */
+ switch (e->Iex.Binop.op) {
+ case Iop_ScaleF64:=20
+ addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
+ break;
+ case Iop_AtanF64:=20
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
+ break;
+ case Iop_Yl2xF64:=20
+ addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
+ break;
+ default:=20
+ vassert(0);
+ }
+
+ /* save result */
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
+
if (e->tag =3D=3D Iex_Binop && e->Iex.Binop.op =3D=3D Iop_I64toF64) {
HReg dst =3D newVRegV(env);
HReg src =3D iselIntExpr_R(env, e->Iex.Binop.arg2);
@@ -2874,28 +2921,31 @@
return dst;
}
=20
-//.. if (e->tag =3D=3D Iex_Unop) {
-//.. X86FpOp fpop =3D Xfp_INVALID;
-//.. switch (e->Iex.Unop.op) {
+ if (e->tag =3D=3D Iex_Unop) {
+ A87FpOp fpop =3D Afp_INVALID;
+ switch (e->Iex.Unop.op) {
//.. case Iop_NegF64: fpop =3D Xfp_NEG; break;
//.. case Iop_AbsF64: fpop =3D Xfp_ABS; break;
-//.. case Iop_SqrtF64: fpop =3D Xfp_SQRT; break;
-//.. case Iop_SinF64: fpop =3D Xfp_SIN; break;
-//.. case Iop_CosF64: fpop =3D Xfp_COS; break;
+ case Iop_SqrtF64: fpop =3D Afp_SQRT; break;
+ case Iop_SinF64: fpop =3D Afp_SIN; break;
+ case Iop_CosF64: fpop =3D Afp_COS; break;
//.. case Iop_TanF64: fpop =3D Xfp_TAN; break;
-//.. case Iop_2xm1F64: fpop =3D Xfp_2XM1; break;
-//.. default: break;
-//.. }
-//.. if (fpop !=3D Xfp_INVALID) {
-//.. HReg res =3D newVRegF(env);
-//.. HReg src =3D iselDblExpr(env, e->Iex.Unop.arg);
-//.. addInstr(env, X86Instr_FpUnary(fpop,src,res));
-//.. if (fpop !=3D Xfp_SQRT
-//.. && fpop !=3D Xfp_NEG && fpop !=3D Xfp_ABS)
-//.. roundToF64(env, res);
-//.. return res;
-//.. }
-//.. }
+ case Iop_2xm1F64: fpop =3D Afp_2XM1; break;
+ default: break;
+ }
+ if (fpop !=3D Afp_INVALID) {
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg =3D iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst =3D newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp=
));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87FpOp(fpop));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp))=
;
+ return dst;
+ }
+ }
=20
if (e->tag =3D=3D Iex_Unop) {
switch (e->Iex.Unop.op) {
|