|
From: <sv...@va...> - 2006-09-11 11:07:37
|
Author: sewardj
Date: 2006-09-11 12:07:34 +0100 (Mon, 11 Sep 2006)
New Revision: 1655
Log:
Add support for amd64 'fprem' (fixes bug 132918). This isn't exactly
right; the C3/2/1/0 FPU flags sometimes don't get set the same as
natively, and I can't figure out why.
Modified:
trunk/priv/guest-amd64/toIR.c
trunk/priv/host-amd64/hdefs.c
trunk/priv/host-amd64/hdefs.h
trunk/priv/host-amd64/isel.c
Modified: trunk/priv/guest-amd64/toIR.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/toIR.c 2006-08-28 21:31:32 UTC (rev 1654)
+++ trunk/priv/guest-amd64/toIR.c 2006-09-11 11:07:34 UTC (rev 1655)
@@ -4887,20 +4887,28 @@
put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
break;
=20
-//.. case 0xF8: { /* FPREM -- not IEEE compliant */
-//.. IRTemp a1 =3D newTemp(Ity_F64);
-//.. IRTemp a2 =3D newTemp(Ity_F64);
-//.. DIP("fprem\n");
-//.. /* Do FPREM twice, once to get the remainder, and on=
ce
-//.. to get the C3210 flag values. */
-//.. assign( a1, get_ST(0) );
-//.. assign( a2, get_ST(1) );
-//.. put_ST_UNCHECKED(0, binop(Iop_PRemF64,
-//.. mkexpr(a1), mkexpr(a2)));
-//.. put_C3210( binop(Iop_PRemC3210F64, mkexpr(a1), mkexp=
r(a2)) );
-//.. break;
-//.. }
-//..=20
+ case 0xF8: { /* FPREM -- not IEEE compliant */
+ IRTemp a1 =3D newTemp(Ity_F64);
+ IRTemp a2 =3D newTemp(Ity_F64);
+ DIP("fprem\n");
+ /* Do FPREM twice, once to get the remainder, and once
+ to get the C3210 flag values. */
+ assign( a1, get_ST(0) );
+ assign( a2, get_ST(1) );
+ put_ST_UNCHECKED(0,
+ triop(Iop_PRemF64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)));
+ put_C3210(
+ unop(Iop_32Uto64,
+ triop(Iop_PRemC3210F64,
+ get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ mkexpr(a1),
+ mkexpr(a2)) ));
+ break;
+ }
+
case 0xF9: /* FYL2XP1 */
DIP("fyl2xp1\n");
put_ST_UNCHECKED(1,=20
Modified: trunk/priv/host-amd64/hdefs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.c 2006-08-28 21:31:32 UTC (rev 1654)
+++ trunk/priv/host-amd64/hdefs.c 2006-09-11 11:07:34 UTC (rev 1655)
@@ -548,7 +548,7 @@
case Afp_ATAN: return "atan";
case Afp_YL2X: return "yl2x";
case Afp_YL2XP1: return "yl2xp1";
-//.. case Xfp_PREM: return "prem";
+ case Afp_PREM: return "prem";
//.. case Xfp_PREM1: return "prem1";
case Afp_SQRT: return "sqrt";
//.. case Xfp_ABS: return "abs";
@@ -819,6 +819,13 @@
i->Ain.A87LdCW.addr =3D addr;
return i;
}
+AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
+{
+ AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag =3D Ain_A87StSW;
+ i->Ain.A87StSW.addr =3D addr;
+ return i;
+}
=20
//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst )=
{
//.. AMD64Instr* i =3D LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1155,19 +1162,23 @@
vex_printf("mfence" );
return;
case Ain_A87Free:
- vex_printf("ffree %%st(7..%d)\n", 7 - i->Ain.A87Free.nregs );
+ vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
break;
case Ain_A87PushPop:
vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
ppAMD64AMode(i->Ain.A87PushPop.addr);
break;
case Ain_A87FpOp:
- vex_printf("f%s\n", showA87FpOp(i->Ain.A87FpOp.op));
+ vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
break;
case Ain_A87LdCW:
vex_printf("fldcw ");
ppAMD64AMode(i->Ain.A87LdCW.addr);
break;
+ case Ain_A87StSW:
+ vex_printf("fstsw ");
+ ppAMD64AMode(i->Ain.A87StSW.addr);
+ break;
//.. case Xin_FpUnary:
//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
//.. ppHRegAMD64(i->Xin.FpUnary.src);
@@ -1493,6 +1504,9 @@
case Ain_A87LdCW:
addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
return;
+ case Ain_A87StSW:
+ addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
+ return;
//.. case Xin_FpUnary:
//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
@@ -1704,6 +1718,9 @@
case Ain_A87LdCW:
mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
return;
+ case Ain_A87StSW:
+ mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
+ return;
//.. case Xin_FpUnary:
//.. mapReg(m, &i->Xin.FpUnary.src);
//.. mapReg(m, &i->Xin.FpUnary.dst);
@@ -2804,6 +2821,7 @@
case Afp_ATAN: *p++ =3D 0xD9; *p++ =3D 0xF3; break;
case Afp_YL2X: *p++ =3D 0xD9; *p++ =3D 0xF1; break;
case Afp_YL2XP1: *p++ =3D 0xD9; *p++ =3D 0xF9; break;
+ case Afp_PREM: *p++ =3D 0xD9; *p++ =3D 0xF8; break;
default: goto bad;
}
goto done;
@@ -2815,6 +2833,13 @@
p =3D doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
goto done;
=20
+ case Ain_A87StSW:
+ *p++ =3D clearWBit(
+ rexAMode_M(fake(7), i->Ain.A87StSW.addr) );
+ *p++ =3D 0xDD;
+ p =3D doAMode_M(p, fake(7)/*subopcode*/, i->Ain.A87StSW.addr);
+ goto done;
+
case Ain_Store:
if (i->Ain.Store.sz =3D=3D 2) {
/* This just goes to show the crazyness of the instruction
Modified: trunk/priv/host-amd64/hdefs.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/hdefs.h 2006-08-28 21:31:32 UTC (rev 1654)
+++ trunk/priv/host-amd64/hdefs.h 2006-09-11 11:07:34 UTC (rev 1655)
@@ -307,7 +307,7 @@
enum {
Afp_INVALID,
/* Binary */
- Afp_SCALE, Afp_ATAN, Afp_YL2X, Afp_YL2XP1,=20
+ Afp_SCALE, Afp_ATAN, Afp_YL2X, Afp_YL2XP1, Afp_PREM,
/* Unary */
Afp_SQRT,
Afp_SIN, Afp_COS, Afp_TAN,
@@ -386,6 +386,7 @@
Ain_A87PushPop, /* x87 loads/stores */
Ain_A87FpOp, /* x87 operations */
Ain_A87LdCW, /* load x87 control word */
+ Ain_A87StSW, /* store x87 status word */
//..=20
//.. Xin_FpUnary, /* FP fake unary op */
//.. Xin_FpBinary, /* FP fake binary op */
@@ -558,6 +559,11 @@
AMD64AMode* addr;
} A87LdCW;
=20
+ /* Store the FPU status word (fstsw m16) */
+ struct {
+ AMD64AMode* addr;
+ } A87StSW;
+
/* --- SSE --- */
=20
/* Load 32 bits into %mxcsr. */
@@ -680,6 +686,7 @@
extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush=
);
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
+extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr );
//..=20
//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, H=
Reg dst );
//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, =
HReg srcR, HReg dst );
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2006-08-28 21:31:32 UTC (rev 1654)
+++ trunk/priv/host-amd64/isel.c 2006-09-11 11:07:34 UTC (rev 1655)
@@ -1636,6 +1636,42 @@
break;
}
=20
+ /* --------- TERNARY OP --------- */
+ case Iex_Triop: {
+ /* C3210 flags following FPU partial remainder (fprem), both
+ IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
+ if (e->Iex.Triop.op =3D=3D Iop_PRemC3210F64) {
+ AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg1 =3D iselDblExpr(env, e->Iex.Triop.arg2);
+ HReg arg2 =3D iselDblExpr(env, e->Iex.Triop.arg3);
+ HReg dst =3D newVRegI(env);
+ addInstr(env, AMD64Instr_A87Free(2));
+
+ /* one arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rs=
p));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ /* other arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rs=
p));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ switch (e->Iex.Triop.op) {
+ case Iop_PRemC3210F64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
+ break;
+ default:=20
+ vassert(0);
+ }
+ /* Ignore the result, and instead make off with the FPU's
+ C3210 flags (in the status word). */
+ addInstr(env, AMD64Instr_A87StSW(m8_rsp));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),d=
st));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),d=
st));
+ return dst;
+ }
+ break;
+ }
+
default:=20
break;
} /* switch (e->tag) */
@@ -2853,13 +2889,15 @@
&& (e->Iex.Triop.op =3D=3D Iop_ScaleF64
|| e->Iex.Triop.op =3D=3D Iop_AtanF64
|| e->Iex.Triop.op =3D=3D Iop_Yl2xF64
- || e->Iex.Triop.op =3D=3D Iop_Yl2xp1F64)
+ || e->Iex.Triop.op =3D=3D Iop_Yl2xp1F64
+ || e->Iex.Triop.op =3D=3D Iop_PRemF64)
) {
AMD64AMode* m8_rsp =3D AMD64AMode_IR(-8, hregAMD64_RSP());
HReg arg1 =3D iselDblExpr(env, e->Iex.Triop.arg2);
HReg arg2 =3D iselDblExpr(env, e->Iex.Triop.arg3);
HReg dst =3D newVRegV(env);
- Bool arg2first =3D toBool(e->Iex.Triop.op =3D=3D Iop_ScaleF64)=
;
+ Bool arg2first =3D toBool(e->Iex.Triop.op =3D=3D Iop_ScaleF64=20
+ || e->Iex.Triop.op =3D=3D Iop_PRemF64)=
;
addInstr(env, AMD64Instr_A87Free(2));
=20
/* one arg -> top of x87 stack */
@@ -2888,6 +2926,9 @@
case Iop_Yl2xp1F64:=20
addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
break;
+ case Iop_PRemF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
+ break;
default:=20
vassert(0);
}
|