|
From: Christian B. <bor...@de...> - 2010-12-15 14:15:16
|
s390x does fp precision not by its floating point control register. Instead
our instruction explicitely work on 32, 64 or 128bit floats. The 128 bit
versions are used for long double and those numbers are saved in register
pairs (0+2,1+3,4+6,5+7,..,13+15).
This patch adds the necessary Vex Ops and the Ity_F128 type.
lackey and memcheck are also adopted to handle the new types.
---
VEX/priv/ir_defs.c | 115 +++++++++++++++++++++++++++++++++++++++++++-
VEX/priv/ir_opt.c | 2
VEX/pub/libvex_ir.h | 76 ++++++++++++++++++++++++++++-
lackey/lk_main.c | 6 +-
memcheck/mc_translate.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 316 insertions(+), 8 deletions(-)
--- valgrind-upstream.orig/VEX/priv/ir_defs.c
+++ valgrind-upstream/VEX/priv/ir_defs.c
@@ -56,6 +56,7 @@ void ppIRType ( IRType ty )
case Ity_I128: vex_printf( "I128"); break;
case Ity_F32: vex_printf( "F32"); break;
case Ity_F64: vex_printf( "F64"); break;
+ case Ity_F128: vex_printf( "F128"); break;
case Ity_V128: vex_printf( "V128"); break;
default: vex_printf("ty = 0x%x\n", (Int)ty);
vpanic("ppIRType");
@@ -64,7 +65,7 @@ void ppIRType ( IRType ty )
void ppIRConst ( IRConst* con )
{
- union { ULong i64; Double f64; } u;
+ union { ULong i64; Double f64; UInt i32; Float f32; } u;
vassert(sizeof(ULong) == sizeof(Double));
switch (con->tag) {
case Ico_U1: vex_printf( "%d:I1", con->Ico.U1 ? 1 : 0); break;
@@ -72,6 +73,10 @@ void ppIRConst ( IRConst* con )
case Ico_U16: vex_printf( "0x%x:I16", (UInt)(con->Ico.U16)); break;
case Ico_U32: vex_printf( "0x%x:I32", (UInt)(con->Ico.U32)); break;
case Ico_U64: vex_printf( "0x%llx:I64", (ULong)(con->Ico.U64)); break;
+ case Ico_F32: u.f32 = con->Ico.F32;
+ vex_printf( "F32{0x%x}", u.i32);
+ break;
+ case Ico_F32i: vex_printf( "F32i{0x%x}", con->Ico.F32i); break;
case Ico_F64: u.f64 = con->Ico.F64;
vex_printf( "F64{0x%llx}", u.i64);
break;
@@ -240,6 +245,14 @@ void ppIROp ( IROp op )
case Iop_128to64: vex_printf("128to64"); return;
case Iop_64HLto128: vex_printf("64HLto128"); return;
+ case Iop_CmpF32: vex_printf("CmpF32"); return;
+ case Iop_F32toI16S: vex_printf("F32toI16S"); return;
+ case Iop_F32toI32S: vex_printf("F32toI32S"); return;
+ case Iop_F32toI64S: vex_printf("F32toI64S"); return;
+ case Iop_I16StoF32: vex_printf("I16StoF32"); return;
+ case Iop_I32StoF32: vex_printf("I32StoF32"); return;
+ case Iop_I64StoF32: vex_printf("I64StoF32"); return;
+
case Iop_AddF64: vex_printf("AddF64"); return;
case Iop_SubF64: vex_printf("SubF64"); return;
case Iop_MulF64: vex_printf("MulF64"); return;
@@ -253,6 +266,32 @@ void ppIROp ( IROp op )
case Iop_MulF32: vex_printf("MulF32"); return;
case Iop_DivF32: vex_printf("DivF32"); return;
+ /* 128 bit floating point */
+ case Iop_AddF128: vex_printf("AddF128"); return;
+ case Iop_SubF128: vex_printf("SubF128"); return;
+ case Iop_MulF128: vex_printf("MulF128"); return;
+ case Iop_DivF128: vex_printf("DivF128"); return;
+ case Iop_AbsF128: vex_printf("AbsF128"); return;
+ case Iop_NegF128: vex_printf("NegF128"); return;
+ case Iop_SqrtF128: vex_printf("SqrtF128"); return;
+ case Iop_CmpF128: vex_printf("CmpF128"); return;
+
+ case Iop_F64HLto128: vex_printf("F64HLto128"); return;
+ case Iop_F128HIto64: vex_printf("F128HIto64"); return;
+ case Iop_F128to64: vex_printf("F128to64"); return;
+ case Iop_I32StoF128: vex_printf("I32StoF128"); return;
+ case Iop_I64StoF128: vex_printf("I64StoF128"); return;
+ case Iop_F128toI32S: vex_printf("F128toI32S"); return;
+ case Iop_F128toI64S: vex_printf("F128toI64S"); return;
+ case Iop_F32toF128: vex_printf("F32toF128"); return;
+ case Iop_F64toF128: vex_printf("F64toF128"); return;
+ case Iop_F128toF64: vex_printf("F128toF64"); return;
+ case Iop_F128toF32: vex_printf("F128toF32"); return;
+
+ /* s390 specific */
+ case Iop_MAddF32: vex_printf("s390_MAddF32"); return;
+ case Iop_MSubF32: vex_printf("s390_MSubF32"); return;
+
case Iop_ScaleF64: vex_printf("ScaleF64"); return;
case Iop_AtanF64: vex_printf("AtanF64"); return;
case Iop_Yl2xF64: vex_printf("Yl2xF64"); return;
@@ -1260,6 +1299,20 @@ IRConst* IRConst_U64 ( ULong u64 )
c->Ico.U64 = u64;
return c;
}
+IRConst* IRConst_F32 ( Float f32 )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_F32;
+ c->Ico.F32 = f32;
+ return c;
+}
+IRConst* IRConst_F32i ( UInt f32i )
+{
+ IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+ c->tag = Ico_F32i;
+ c->Ico.F32i = f32i;
+ return c;
+}
IRConst* IRConst_F64 ( Double f64 )
{
IRConst* c = LibVEX_Alloc(sizeof(IRConst));
@@ -1708,6 +1761,8 @@ IRConst* deepCopyIRConst ( IRConst* c )
case Ico_U16: return IRConst_U16(c->Ico.U16);
case Ico_U32: return IRConst_U32(c->Ico.U32);
case Ico_U64: return IRConst_U64(c->Ico.U64);
+ case Ico_F32: return IRConst_F32(c->Ico.F32);
+ case Ico_F32i: return IRConst_F32i(c->Ico.F32i);
case Ico_F64: return IRConst_F64(c->Ico.F64);
case Ico_F64i: return IRConst_F64i(c->Ico.F64i);
case Ico_V128: return IRConst_V128(c->Ico.V128);
@@ -2189,9 +2244,15 @@ void typeOfPrimop ( IROp op,
case Iop_RoundF32toInt:
BINARY(ity_RMode,Ity_F32, Ity_F32);
+ case Iop_CmpF32:
+ BINARY(Ity_F32,Ity_F32, Ity_I32);
+
case Iop_CmpF64:
BINARY(Ity_F64,Ity_F64, Ity_I32);
+ case Iop_CmpF128:
+ BINARY(Ity_F128,Ity_F128, Ity_I32);
+
case Iop_F64toI16S: BINARY(ity_RMode,Ity_F64, Ity_I16);
case Iop_F64toI32S: BINARY(ity_RMode,Ity_F64, Ity_I32);
case Iop_F64toI64S: BINARY(ity_RMode,Ity_F64, Ity_I64);
@@ -2204,6 +2265,14 @@ void typeOfPrimop ( IROp op,
case Iop_I32UtoF64: UNARY(Ity_I32, Ity_F64);
+ case Iop_F32toI16S: BINARY(ity_RMode,Ity_F32, Ity_I16);
+ case Iop_F32toI32S: BINARY(ity_RMode,Ity_F32, Ity_I32);
+ case Iop_F32toI64S: BINARY(ity_RMode,Ity_F32, Ity_I64);
+
+ case Iop_I16StoF32: UNARY(Ity_I16, Ity_F32);
+ case Iop_I32StoF32: BINARY(ity_RMode,Ity_I32, Ity_F32);
+ case Iop_I64StoF32: BINARY(ity_RMode,Ity_I64, Ity_F32);
+
case Iop_F32toF64: UNARY(Ity_F32, Ity_F64);
case Iop_F64toF32: BINARY(ity_RMode,Ity_F64, Ity_F32);
@@ -2443,6 +2512,43 @@ void typeOfPrimop ( IROp op,
case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
BINARY(Ity_I64, Ity_I64, Ity_V128);
+ /* s390 specific */
+ case Iop_MAddF32:
+ case Iop_MSubF32:
+ QUATERNARY(ity_RMode,Ity_F32,Ity_F32,Ity_F32, Ity_F32);
+
+ case Iop_F64HLto128:
+ BINARY(Ity_F64,Ity_F64, Ity_F128);
+
+ case Iop_F128HIto64:
+ case Iop_F128to64:
+ UNARY(Ity_F128, Ity_F64);
+
+ case Iop_AddF128:
+ case Iop_SubF128:
+ case Iop_MulF128:
+ case Iop_DivF128:
+ TERNARY(ity_RMode,Ity_F128,Ity_F128, Ity_F128);
+
+ case Iop_NegF128:
+ case Iop_AbsF128:
+ UNARY(Ity_F128, Ity_F128);
+
+ case Iop_SqrtF128:
+ BINARY(ity_RMode,Ity_F128, Ity_F128);
+
+ case Iop_I32StoF128: UNARY(Ity_I32, Ity_F128);
+ case Iop_I64StoF128: UNARY(Ity_I64, Ity_F128);
+
+ case Iop_F128toI32S: BINARY(ity_RMode,Ity_F128, Ity_I32);
+ case Iop_F128toI64S: BINARY(ity_RMode,Ity_F128, Ity_I64);
+
+ case Iop_F32toF128: UNARY(Ity_F32, Ity_F128);
+ case Iop_F64toF128: UNARY(Ity_F64, Ity_F128);
+
+ case Iop_F128toF32: BINARY(ity_RMode,Ity_F128, Ity_F32);
+ case Iop_F128toF64: BINARY(ity_RMode,Ity_F128, Ity_F64);
+
default:
ppIROp(op);
vpanic("typeOfPrimop");
@@ -2525,6 +2631,8 @@ IRType typeOfIRConst ( IRConst* con )
case Ico_U16: return Ity_I16;
case Ico_U32: return Ity_I32;
case Ico_U64: return Ity_I64;
+ case Ico_F32: return Ity_F32;
+ case Ico_F32i: return Ity_F32;
case Ico_F64: return Ity_F64;
case Ico_F64i: return Ity_F64;
case Ico_V128: return Ity_V128;
@@ -2584,7 +2692,7 @@ Bool isPlausibleIRType ( IRType ty )
case Ity_INVALID: case Ity_I1:
case Ity_I8: case Ity_I16: case Ity_I32:
case Ity_I64: case Ity_I128:
- case Ity_F32: case Ity_F64:
+ case Ity_F32: case Ity_F64: case Ity_F128:
case Ity_V128:
return True;
default:
@@ -3420,6 +3528,8 @@ Bool eqIRConst ( IRConst* c1, IRConst* c
case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
+ case Ico_F32: return toBool( c1->Ico.F32 == c2->Ico.F32 );
+ case Ico_F32i: return toBool( c1->Ico.F32i == c2->Ico.F32i );
case Ico_F64: return toBool( c1->Ico.F64 == c2->Ico.F64 );
case Ico_F64i: return toBool( c1->Ico.F64i == c2->Ico.F64i );
case Ico_V128: return toBool( c1->Ico.V128 == c2->Ico.V128 );
@@ -3444,6 +3554,7 @@ Int sizeofIRType ( IRType ty )
case Ity_I128: return 16;
case Ity_F32: return 4;
case Ity_F64: return 8;
+ case Ity_F128: return 16;
case Ity_V128: return 16;
default: vex_printf("\n"); ppIRType(ty); vex_printf("\n");
vpanic("sizeofIRType");
--- valgrind-upstream.orig/VEX/priv/ir_opt.c
+++ valgrind-upstream/VEX/priv/ir_opt.c
@@ -4506,7 +4506,7 @@ static void considerExpensives ( /*OUT*/
case Ity_I1: case Ity_I8: case Ity_I16:
case Ity_I32: case Ity_I64: case Ity_I128:
break;
- case Ity_F32: case Ity_F64: case Ity_V128:
+ case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
*hasVorFtemps = True;
break;
default:
--- valgrind-upstream.orig/VEX/pub/libvex_ir.h
+++ valgrind-upstream/VEX/pub/libvex_ir.h
@@ -227,6 +227,7 @@ typedef
Ity_I128, /* 128-bit scalar */
Ity_F32, /* IEEE 754 float */
Ity_F64, /* IEEE 754 double */
+ Ity_F128, /* 128-bit floating point; implementation defined */
Ity_V128 /* 128-bit SIMD */
}
IRType;
@@ -261,6 +262,9 @@ typedef
Ico_U16,
Ico_U32,
Ico_U64,
+ Ico_F32, /* 32-bit IEEE754 floating */
+ Ico_F32i, /* 32-bit unsigned int to be interpreted literally
+ as a IEEE754 single value. */
Ico_F64, /* 64-bit IEEE754 floating */
Ico_F64i, /* 64-bit unsigned int to be interpreted literally
as a IEEE754 double value. */
@@ -282,6 +286,8 @@ typedef
UShort U16;
UInt U32;
ULong U64;
+ Float F32;
+ UInt F32i;
Double F64;
ULong F64i;
UShort V128; /* 16-bit value; see Ico_V128 comment above */
@@ -295,6 +301,8 @@ extern IRConst* IRConst_U8 ( UChar );
extern IRConst* IRConst_U16 ( UShort );
extern IRConst* IRConst_U32 ( UInt );
extern IRConst* IRConst_U64 ( ULong );
+extern IRConst* IRConst_F32 ( Float );
+extern IRConst* IRConst_F32i ( UInt );
extern IRConst* IRConst_F64 ( Double );
extern IRConst* IRConst_F64i ( ULong );
extern IRConst* IRConst_V128 ( UShort );
@@ -556,6 +564,26 @@ typedef
/* :: F64 x F64 -> IRCmpF64Result(I32) */
Iop_CmpF64,
+ /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
+ 0x45 Unordered
+ 0x01 LT
+ 0x00 GT
+ 0x40 EQ
+ This just happens to be the Intel encoding. The values
+ are recorded in the type IRCmpF32Result.
+ */
+ Iop_CmpF32,
+
+ /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
+ 0x45 Unordered
+ 0x01 LT
+ 0x00 GT
+ 0x40 EQ
+ This just happens to be the Intel encoding. The values
+ are recorded in the type IRCmpF128Result.
+ */
+ Iop_CmpF128,
+
/* --- Int to/from FP conversions. --- */
/* For the most part, these take a first argument :: Ity_I32 (as
@@ -609,6 +637,14 @@ typedef
Iop_I32UtoF64, /* unsigned I32 -> F64 */
+ Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
+ Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
+ Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
+
+ Iop_I16StoF32, /* signed I16 -> F32 */
+ Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
+ Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
+
/* Conversion between floating point formats */
Iop_F32toF64, /* F32 -> F64 */
Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */
@@ -618,6 +654,30 @@ typedef
Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
+ /* Support for 128-bit floating point */
+ Iop_F64HLto128, // :: (high half of F128,low half of F128) -> F128
+ Iop_F128HIto64, // :: F128 -> high half of F128
+ Iop_F128to64, // :: F128 -> low half of F128
+
+ /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
+ Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
+
+ /* :: F128 -> F128 */
+ Iop_NegF128, Iop_AbsF128,
+
+ /* :: IRRoundingMode(I32) x F128 -> F128 */
+ Iop_SqrtF128,
+
+ Iop_I32StoF128, /* signed I32 -> F128 */
+ Iop_I64StoF128, /* signed I64 -> F128 */
+ Iop_F32toF128, /* F32 -> F128 */
+ Iop_F64toF128, /* F64 -> F128 */
+
+ Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */
+ Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */
+ Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */
+ Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */
+
/* --- guest x86/amd64 specifics, not mandated by 754. --- */
/* Binary ops, with rounding. */
@@ -645,11 +705,19 @@ typedef
Iop_RoundF32toInt, /* F32 value to nearest integral value (still
as F32) */
+ /* --- guest s390 specifics, not mandated by 754. --- */
+
+ /* Fused multiply-add/sub */
+ /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
+ (computes op3 * op2 ± op1 */
+ Iop_MAddF32, Iop_MSubF32,
+
/* --- guest ppc32/64 specifics, not mandated by 754. --- */
/* Ternary operations, with rounding. */
/* Fused multiply-add/sub, with 112-bit intermediate
- precision */
+ precision for ppc.
+ Also used to implement fused multiply-add/sub for s390. */
/* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
(computes arg2 * arg3 ± arg4) */
Iop_MAddF64, Iop_MSubF64,
@@ -1216,7 +1284,9 @@ typedef
Irrm_NEAREST = 0,
Irrm_NegINF = 1,
Irrm_PosINF = 2,
- Irrm_ZERO = 3
+ Irrm_ZERO = 3,
+ Irrm_NEAREST_AWAY = 4,
+ Irrm_CURRENT = 5
}
IRRoundingMode;
@@ -1231,6 +1301,8 @@ typedef
}
IRCmpF64Result;
+typedef IRCmpF64Result IRCmpF32Result;
+typedef IRCmpF64Result IRCmpF128Result;
/* ------------------ Expressions ------------------ */
--- valgrind-upstream.orig/lackey/lk_main.c
+++ valgrind-upstream/lackey/lk_main.c
@@ -314,7 +314,8 @@ static Int type2index ( IRType ty )
case Ity_I128: return 5;
case Ity_F32: return 6;
case Ity_F64: return 7;
- case Ity_V128: return 8;
+ case Ity_F128: return 8;
+ case Ity_V128: return 9;
default: tl_assert(0);
}
}
@@ -330,7 +331,8 @@ static HChar* nameOfTypeIndex ( Int i )
case 5: return "I128"; break;
case 6: return "F32"; break;
case 7: return "F64"; break;
- case 8: return "V128"; break;
+ case 8: return "F128"; break;
+ case 9: return "V128"; break;
default: tl_assert(0);
}
}
--- valgrind-upstream.orig/memcheck/mc_translate.c
+++ valgrind-upstream/memcheck/mc_translate.c
@@ -121,6 +121,7 @@ static IRType shadowTypeV ( IRType ty )
static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
+static IRExpr *i128_const_zero(void);
/*------------------------------------------------------------*/
/*--- Memcheck running state, and tmp management. ---*/
@@ -343,7 +344,7 @@ static Bool sameKindedAtoms ( IRAtom* a1
/* Shadow state is always accessed using integer types. This returns
an integer type with the same size (as per sizeofIRType) as the
given type. The only valid shadow types are Bit, I8, I16, I32,
- I64, V128. */
+ I64, I128, V128. */
static IRType shadowTypeV ( IRType ty )
{
@@ -356,6 +357,7 @@ static IRType shadowTypeV ( IRType ty )
case Ity_I128: return ty;
case Ity_F32: return Ity_I32;
case Ity_F64: return Ity_I64;
+ case Ity_F128: return Ity_I128;
case Ity_V128: return Ity_V128;
default: ppIRType(ty);
VG_(tool_panic)("memcheck:shadowTypeV");
@@ -371,6 +373,7 @@ static IRExpr* definedOfType ( IRType ty
case Ity_I16: return IRExpr_Const(IRConst_U16(0));
case Ity_I32: return IRExpr_Const(IRConst_U32(0));
case Ity_I64: return IRExpr_Const(IRConst_U64(0));
+ case Ity_I128: return i128_const_zero();
case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
default: VG_(tool_panic)("memcheck:definedOfType");
}
@@ -438,6 +441,18 @@ static IRAtom* assignNew ( HChar cat, MC
/*------------------------------------------------------------*/
+/*--- Helper functions for 128-bit ops ---*/
+/*------------------------------------------------------------*/
+static IRExpr *i128_const_zero(void)
+{
+ return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
+ IRExpr_Const(IRConst_U64(0)));
+}
+
+/* There are no 128-bit loads and/or stores. So we do not need to worry
+ about that in expr2vbits_Load */
+
+/*------------------------------------------------------------*/
/*--- Constructing definedness primitive ops ---*/
/*------------------------------------------------------------*/
@@ -499,6 +514,20 @@ static IRAtom* mkUifU64 ( MCEnv* mce, IR
return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
}
+static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
+ IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
+ tl_assert(isShadowAtom(mce,a1));
+ tl_assert(isShadowAtom(mce,a2));
+ tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
+ tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
+ tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
+ tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
+ tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
+ tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
+
+ return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
+}
+
static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
tl_assert(isShadowAtom(mce,a1));
tl_assert(isShadowAtom(mce,a2));
@@ -511,6 +540,7 @@ static IRAtom* mkUifU ( MCEnv* mce, IRTy
case Ity_I16: return mkUifU16(mce, a1, a2);
case Ity_I32: return mkUifU32(mce, a1, a2);
case Ity_I64: return mkUifU64(mce, a1, a2);
+ case Ity_I128: return mkUifU128(mce, a1, a2);
case Ity_V128: return mkUifUV128(mce, a1, a2);
default:
VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
@@ -650,6 +680,10 @@ static IRAtom* mkImproveORV128 ( MCEnv*
/* --------- Pessimising casts. --------- */
+/* The function returns an expression of type DST_TY. If any of the VBITS
+ is undefined (value == 1) the resulting expression has all bits set to
+ 1. Otherwise, all bits are 0. */
+
static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
{
IRType src_ty;
@@ -1202,6 +1236,7 @@ void do_shadow_PUT ( MCEnv* mce, Int of
ty = typeOfIRExpr(mce->sb->tyenv, vatom);
tl_assert(ty != Ity_I1);
+ tl_assert(ty != Ity_I128);
if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
/* later: no ... */
/* emit code to emit a complaint if any of the vbits are 1. */
@@ -1263,6 +1298,7 @@ IRExpr* shadow_GET ( MCEnv* mce, Int off
{
IRType tyS = shadowTypeV(ty);
tl_assert(ty != Ity_I1);
+ tl_assert(ty != Ity_I128);
if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
/* Always defined, return all zeroes of the relevant type */
return definedOfType(tyS);
@@ -1414,6 +1450,22 @@ IRAtom* mkLazy3 ( MCEnv* mce, IRType fin
return at;
}
+ /* I32 x I128 x I128 -> I128 */
+ /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
+ if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
+ && finalVty == Ity_I128) {
+ if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
+ /* Widen 1st arg to I128. Since 1st arg is typically a rounding
+ mode indication which is fully defined, this should get
+ folded out later. */
+ at = mkPCastTo(mce, Ity_I128, va1);
+ /* Now fold in 2nd and 3rd args. */
+ at = mkUifU(mce, Ity_I128, at, va2);
+ at = mkUifU(mce, Ity_I128, at, va3);
+ /* and PCast once again. */
+ at = mkPCastTo(mce, Ity_I128, at);
+ return at;
+ }
if (1) {
VG_(printf)("mkLazy3: ");
ppIRType(t1);
@@ -1474,6 +1526,19 @@ IRAtom* mkLazy4 ( MCEnv* mce, IRType fin
at = mkPCastTo(mce, Ity_I64, at);
return at;
}
+ /* I32 x I32 x I32 x I32 -> I32 */
+ /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
+ if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
+ && finalVty == Ity_I32) {
+ if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
+ at = va1;
+ /* Now fold in 2nd, 3rd, 4th args. */
+ at = mkUifU(mce, Ity_I32, at, va2);
+ at = mkUifU(mce, Ity_I32, at, va3);
+ at = mkUifU(mce, Ity_I32, at, va4);
+ at = mkPCastTo(mce, Ity_I32, at);
+ return at;
+ }
if (1) {
VG_(printf)("mkLazy4: ");
@@ -2136,6 +2201,12 @@ IRAtom* expr2vbits_Qop ( MCEnv* mce,
case Iop_MSubF64r32:
/* I32(rm) x F64 x F64 x F64 -> F64 */
return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
+
+ case Iop_MAddF32:
+ case Iop_MSubF32:
+ /* I32(rm) x F32 x F32 x F32 -> F32 */
+ return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
+
default:
ppIROp(op);
VG_(tool_panic)("memcheck:expr2vbits_Qop");
@@ -2162,6 +2233,12 @@ IRAtom* expr2vbits_Triop ( MCEnv* mce,
tl_assert(sameKindedAtoms(atom2,vatom2));
tl_assert(sameKindedAtoms(atom3,vatom3));
switch (op) {
+ case Iop_AddF128:
+ case Iop_SubF128:
+ case Iop_MulF128:
+ case Iop_DivF128:
+ /* I32(rm) x F128 x F128 -> F128 */
+ return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
case Iop_AddF64:
case Iop_AddF64r32:
case Iop_SubF64:
@@ -2847,6 +2924,14 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
/* Scalar floating point */
+ case Iop_F32toI64S:
+ /* I32(rm) x F32 -> I64 */
+ return mkLazy2(mce, Ity_I64, vatom1, vatom2);
+
+ case Iop_I64StoF32:
+ /* I32(rm) x I64 -> F32 */
+ return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+
case Iop_RoundF64toInt:
case Iop_RoundF64toF32:
case Iop_F64toI64S:
@@ -2864,6 +2949,26 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
/* I32(rm) x I32/F32 -> I32/F32 */
return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+ case Iop_SqrtF128:
+ /* I32(rm) x F128 -> F128 */
+ return mkLazy2(mce, Ity_I128, vatom1, vatom2);
+
+ case Iop_I32StoF32:
+ case Iop_F32toI32S:
+ /* First arg is I32 (rounding mode), second is F32/I32 (data). */
+ return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+
+ case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
+ case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
+ return mkLazy2(mce, Ity_I32, vatom1, vatom2);
+
+ case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
+ case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
+ return mkLazy2(mce, Ity_I64, vatom1, vatom2);
+
+ case Iop_F64HLto128:
+ return mkLazy2(mce, Ity_I128, vatom1, vatom2);
+
case Iop_F64toI32U:
case Iop_F64toI32S:
case Iop_F64toF32:
@@ -2874,7 +2979,9 @@ IRAtom* expr2vbits_Binop ( MCEnv* mce,
/* First arg is I32 (rounding mode), second is F64 (data). */
return mkLazy2(mce, Ity_I16, vatom1, vatom2);
+ case Iop_CmpF32:
case Iop_CmpF64:
+ case Iop_CmpF128:
return mkLazy2(mce, Ity_I32, vatom1, vatom2);
/* non-FP after here */
@@ -3143,6 +3250,20 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IR
case Iop_Reverse64_32x4:
return assignNew('V', mce, Ity_V128, unop(op, vatom));
+ case Iop_F128HIto64: /* F128 -> high half of F128 */
+ case Iop_F128to64: /* F128 -> low half of F128 */
+ return mkPCastTo(mce, Ity_I64, vatom);
+
+ case Iop_NegF128:
+ case Iop_AbsF128:
+ return mkPCastTo(mce, Ity_I128, vatom);
+
+ case Iop_I32StoF128: /* signed I32 -> F128 */
+ case Iop_I64StoF128: /* signed I64 -> F128 */
+ case Iop_F32toF128: /* F32 -> F128 */
+ case Iop_F64toF128: /* F64 -> F128 */
+ return mkPCastTo(mce, Ity_I128, vatom);
+
case Iop_F32toF64:
case Iop_I32StoF64:
case Iop_I32UtoF64:
@@ -3186,6 +3307,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IR
case Iop_Reverse64_32x2:
return assignNew('V', mce, Ity_I64, unop(op, vatom));
+ case Iop_I16StoF32:
case Iop_64to32:
case Iop_64HIto32:
case Iop_1Uto32:
@@ -4537,6 +4659,7 @@ static Bool isBogusAtom ( IRAtom* at )
case Ico_U32: n = (ULong)con->Ico.U32; break;
case Ico_U64: n = (ULong)con->Ico.U64; break;
case Ico_F64: return False;
+ case Ico_F32i: return False;
case Ico_F64i: return False;
case Ico_V128: return False;
default: ppIRExpr(at); tl_assert(0);
|