|
From: Thomas R. <tr...@st...> - 2011-11-02 17:05:39
|
Hi *,
Due to us using valgrind in an exam environment[1] with CGAL[2], I am
in the unfortunate position that I have to get FP rounding to work
without restrictions.
Since it's bad enough that we (or to be precise, my future successors)
have to maintain a valgrind module, I would like to put in some work
time to get this upstreamed. I am thus asking for your guidance on
what approach you would want to be taken.
What I did so far: I hacked the VEX amd64 backend to emit rounding
code in every place not already covered. Specifically, it loads the
rounding mode from the appropriate register before doing the FP
operation(s), then reverts back to the default. The patch to do this
is at the end, and also at
git://csa.inf.ethz.ch/VEX master
This works in my testing, but it's probably not what you had in mind.
From my end it looks as if the current idea is to put an IRExpr for
the rounding mode field of every affected instruction, and then
load/restore that IRExpr on demand without looking at it again. Is
that correct?
If so, I think that could be done, although it's just a slightly
weird(er) way to achieve the same.
OTOH things could be done more efficiently if we could do away with
the restriction that the rounding mode must be "nearest" except in
little blocks around client instructions. That is, I'm interested in
the limitations explained in the comment:
/* Vex-generated code expects to run with the FPU set as follows: all
exceptions masked, round-to-nearest, precision = 53 bits. This
corresponds to a FPU control word value of 0x027F.
Similarly the SSE control word (%mxcsr) should be 0x1F80.
%fpucw and %mxcsr should have these values on entry to
Vex-generated code, and should those values should be
unchanged at exit.
*/
What parts of the code assume this? How hard is it to lift this
limitation?
If that's possible, my idea would be to have a special "current
rounding mode" value in the rounding field. Then instructions that do
not specify a rounding mode can be translated without changing the
flags at all. OTOH instructions that do specify rounding would need
to save and restore the value in effect at that point.
On a related note, it seems to me that the choice of 387 or SSE
instructions at the VEX backend is not correlated with instructions
parsed by the front end. Is that correct? If so, maybe there should
instead be two magic "current 387/SSE rounding" flags so as to allow
the code generator to know which one to use.
Thanks,
Thomas
[1] http://sourceforge.net/mailarchive/forum.php?thread_name=201012141040.41546.trast%40student.ethz.ch&forum_name=valgrind-developers
[2] http://www.cgal.org/ -- parts rely on round-to-infinity behavior
diff --git c/priv/host_amd64_isel.c w/priv/host_amd64_isel.c
index bcd213f..93bce58 100644
--- c/priv/host_amd64_isel.c
+++ w/priv/host_amd64_isel.c
@@ -33,6 +33,7 @@
without prior written permission.
*/
+#include "libvex_guest_amd64.h"
#include "libvex_basictypes.h"
#include "libvex_ir.h"
#include "libvex.h"
@@ -45,7 +46,6 @@
#include "host_generic_simd128.h"
#include "host_amd64_defs.h"
-
/*---------------------------------------------------------*/
/*--- x87/SSE control word stuff ---*/
/*---------------------------------------------------------*/
@@ -727,6 +727,33 @@ void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
add_to_rsp(env, 8);
}
+#define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
+#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
+# define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
+
+static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
+{
+ return
+ unop( Iop_64to32,
+ binop( Iop_And64,
+ IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
+ mkU64(3) ));
+}
+
+static IRExpr* /* :: Ity_I32 */ get_fpu_roundingmode ( void )
+{
+ return
+ unop( Iop_64to32,
+ binop( Iop_And64,
+ IRExpr_Get( OFFB_FPROUND, Ity_I64 ),
+ mkU64(3) ));
+}
+
+static
+void set_SSE_rounding_mode_from_guest ( ISelEnv* env )
+{
+ set_SSE_rounding_mode(env, get_sse_roundingmode());
+}
/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
expression denoting a value in the range 0 .. 3, indicating a round
@@ -757,6 +784,12 @@ void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
}
+static
+void set_FPU_rounding_mode_from_guest ( ISelEnv* env )
+{
+ set_FPU_rounding_mode(env, get_fpu_roundingmode());
+}
+
/* Generate all-zeroes into a new vector register.
*/
@@ -3004,9 +3037,9 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
addInstr(env, mk_vMOVsd_RR(argL, dst));
- /* XXXROUNDINGFIXME */
- /* set roundingmode here */
+ set_SSE_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
+ set_SSE_rounding_default(env);
return dst;
}
}
@@ -3062,9 +3095,8 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
- /* do it */
- /* XXXROUNDINGFIXME */
- /* set roundingmode here */
+ set_FPU_rounding_mode_from_guest(env);
+
switch (e->Iex.Triop.op) {
case Iop_ScaleF64:
addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
@@ -3091,6 +3123,9 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
/* save result */
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+
+ set_FPU_rounding_default(env);
+
return dst;
}
@@ -3157,6 +3192,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* XXXROUNDINGFIXME */
/* set roundingmode here */
+ set_FPU_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_A87FpOp(fpop));
if (e->Iex.Binop.op==Iop_TanF64) {
/* get rid of the extra 1.0 that fptan pushes */
@@ -3164,6 +3200,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
}
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ set_FPU_rounding_default(env);
return dst;
}
}
@@ -3486,7 +3523,9 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegV(env);
addInstr(env, mk_vMOVsd_RR(argL, dst));
+ set_SSE_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
+ set_SSE_rounding_default(env);
return dst;
}
@@ -3506,7 +3545,9 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegV(env);
addInstr(env, mk_vMOVsd_RR(argL, dst));
+ set_SSE_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
+ set_SSE_rounding_default(env);
return dst;
}
@@ -3525,7 +3566,9 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegV(env);
addInstr(env, mk_vMOVsd_RR(argL, dst));
+ set_SSE_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
+ set_SSE_rounding_default(env);
return dst;
}
@@ -3544,7 +3587,9 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
HReg dst = newVRegV(env);
addInstr(env, mk_vMOVsd_RR(argL, dst));
+ set_SSE_rounding_mode_from_guest(env);
addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
+ set_SSE_rounding_default(env);
return dst;
}
--
Thomas Rast
trast@{inf,student}.ethz.ch
|