You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
1
(2) |
2
(2) |
3
|
4
(1) |
5
|
|
6
|
7
(2) |
8
(6) |
9
(2) |
10
|
11
|
12
|
|
13
|
14
|
15
(2) |
16
|
17
(1) |
18
(1) |
19
|
|
20
(1) |
21
(1) |
22
(1) |
23
|
24
|
25
|
26
|
|
27
|
28
|
29
|
30
(1) |
31
|
|
|
|
From: Mark W. <ma...@so...> - 2020-12-15 15:36:08
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=04cdc29b007594a0e58ffef0c9dd87df3ea595ea commit 04cdc29b007594a0e58ffef0c9dd87df3ea595ea Author: Mark Wielaard <ma...@kl...> Date: Wed Oct 14 06:11:34 2020 -0400 arm64 VEX frontend and backend support for Iop_M{Add,Sub}F{32,64} The arm64 frontend used to implement the scalar fmadd, fmsub, fnmadd and fnmsub iinstructions into separate addition/substraction and multiplication instructions, which caused rounding issues. This patch turns them into Iop_M{Add,Sub}F{32,64} instructions (with some arguments negated). And the backend now emits fmadd or fmsub instructions. Alexandra Hajkova <aha...@re...> added tests and fixed up the implementation to make sure rounding (and sign) are correct now. https://bugs.kde.org/show_bug.cgi?id=426014 Diff: --- VEX/priv/guest_arm64_toIR.c | 58 +++++++++++---- VEX/priv/host_arm64_defs.c | 136 +++++++++++++++++++++++++++++++++- VEX/priv/host_arm64_defs.h | 30 ++++++++ VEX/priv/host_arm64_isel.c | 39 ++++++++++ none/tests/arm64/Makefile.am | 6 +- none/tests/arm64/fmadd_sub.c | 98 ++++++++++++++++++++++++ none/tests/arm64/fmadd_sub.stderr.exp | 0 none/tests/arm64/fmadd_sub.stdout.exp | 125 +++++++++++++++++++++++++++++++ none/tests/arm64/fmadd_sub.vgtest | 3 + 9 files changed, 479 insertions(+), 16 deletions(-) diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 556b85a6af..d242d43c07 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -286,6 +286,12 @@ static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) return IRExpr_Triop(op, a1, a2, a3); } +static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2, + IRExpr* a3, IRExpr* a4 ) +{ + return IRExpr_Qop(op, a1, a2, a3, a4); +} + static IRExpr* loadLE ( IRType ty, IRExpr* addr ) { return IRExpr_Load(Iend_LE, ty, addr); @@ -532,6 +538,22 @@ static IROp mkADDF ( IRType ty ) { } } +static IROp mkFMADDF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_MAddF32; + case Ity_F64: return Iop_MAddF64; + default: vpanic("mkFMADDF"); + } +} + +static IROp mkFMSUBF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_MSubF32; + case Ity_F64: return Iop_MSubF64; + default: vpanic("mkFMSUBF"); + } +} + static IROp mkSUBF ( IRType ty ) { switch (ty) { case Ity_F32: return Iop_SubF32; @@ -14368,30 +14390,40 @@ Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn) where Fx=Dx when sz=1, Fx=Sx when sz=0 -----SPEC------ ----IMPL---- - fmadd a + n * m a + n * m - fmsub a + (-n) * m a - n * m - fnmadd (-a) + (-n) * m -(a + n * m) - fnmsub (-a) + n * m -(a - n * m) + fmadd a + n * m fmadd (a, n, m) + fmsub a + (-n) * m fmsub (a, n, m) + fnmadd (-a) + (-n) * m fmadd (-a, -n, m) + fnmsub (-a) + n * m fmadd (-a, n, m) + + Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A */ Bool isD = (ty & 1) == 1; UInt ix = (bitO1 << 1) | bitO0; IRType ity = isD ? Ity_F64 : Ity_F32; - IROp opADD = mkADDF(ity); - IROp opSUB = mkSUBF(ity); - IROp opMUL = mkMULF(ity); + IROp opFMADD = mkFMADDF(ity); + IROp opFMSUB = mkFMSUBF(ity); IROp opNEG = mkNEGF(ity); IRTemp res = newTemp(ity); IRExpr* eA = getQRegLO(aa, ity); IRExpr* eN = getQRegLO(nn, ity); IRExpr* eM = getQRegLO(mm, ity); IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); - IRExpr* eNxM = triop(opMUL, rm, eN, eM); switch (ix) { - case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; - case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; - case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; - case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; - default: vassert(0); + case 0: /* FMADD */ + assign(res, qop(opFMADD, rm, eN, eM, eA)); + break; + case 1: /* FMSUB */ + assign(res, qop(opFMSUB, rm, eN, eM, eA)); + break; + case 2: /* FNMADD */ + assign(res, qop(opFMADD, rm, unop(opNEG, eN), eM, + unop(opNEG,eA))); + break; + case 3: /* FNMSUB */ + assign(res, qop(opFMADD, rm, eN, eM, unop(opNEG, eA))); + break; + default: + vassert(0); } putQReg128(dd, mkV128(0x0000)); putQRegLO(dd, mkexpr(res)); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index e4ef569868..13b497f600 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -546,6 +546,14 @@ static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) { } } +static const HChar* showARM64FpTriOp ( ARM64FpTriOp op ) { + switch (op) { + case ARM64fpt_FMADD: return "fmadd"; + case ARM64fpt_FMSUB: return "fmsub"; + default: vpanic("showARM64FpTriOp"); + } +} + static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) { switch (op) { case ARM64fpu_NEG: return "neg "; @@ -1154,6 +1162,28 @@ ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, i->ARM64in.VBinS.argR = argR; return i; } +ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op, + HReg dst, HReg arg1, HReg arg2, HReg arg3 ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VTriD; + i->ARM64in.VTriD.op = op; + i->ARM64in.VTriD.dst = dst; + i->ARM64in.VTriD.arg1 = arg1; + i->ARM64in.VTriD.arg2 = arg2; + i->ARM64in.VTriD.arg3 = arg3; + return i; +} +ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op, + HReg dst, HReg arg1, HReg arg2, HReg arg3 ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VTriS; + i->ARM64in.VTriS.op = op; + i->ARM64in.VTriS.dst = dst; + i->ARM64in.VTriS.arg1 = arg1; + i->ARM64in.VTriS.arg2 = arg2; + i->ARM64in.VTriS.arg3 = arg3; + return i; +} ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); i->tag = ARM64in_VCmpD; @@ -1756,6 +1786,26 @@ void ppARM64Instr ( const ARM64Instr* i ) { vex_printf(", "); ppHRegARM64asSreg(i->ARM64in.VBinS.argR); return; + case ARM64in_VTriD: + vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriD.op)); + ppHRegARM64(i->ARM64in.VTriD.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VTriD.arg1); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VTriD.arg2); + vex_printf(", "); + ppHRegARM64(i->ARM64in.VTriD.arg3); + return; + case ARM64in_VTriS: + vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriS.op)); + ppHRegARM64asSreg(i->ARM64in.VTriS.dst); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VTriS.arg1); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VTriS.arg2); + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VTriS.arg3); + return; case ARM64in_VCmpD: vex_printf("fcmp "); ppHRegARM64(i->ARM64in.VCmpD.argL); @@ -2197,6 +2247,18 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL); addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR); return; + case ARM64in_VTriD: + addHRegUse(u, HRmWrite, i->ARM64in.VTriD.dst); + addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg1); + addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg2); + addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg3); + return; + case ARM64in_VTriS: + addHRegUse(u, HRmWrite, i->ARM64in.VTriS.dst); + addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg1); + addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg2); + addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg3); + return; case ARM64in_VCmpD: addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL); addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR); @@ -2454,6 +2516,18 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL); i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR); return; + case ARM64in_VTriD: + i->ARM64in.VTriD.dst = lookupHRegRemap(m, i->ARM64in.VTriD.dst); + i->ARM64in.VTriD.arg1 = lookupHRegRemap(m, i->ARM64in.VTriD.arg1); + i->ARM64in.VTriD.arg2 = lookupHRegRemap(m, i->ARM64in.VTriD.arg2); + i->ARM64in.VTriD.arg3 = lookupHRegRemap(m, i->ARM64in.VTriD.arg3); + return; + case ARM64in_VTriS: + i->ARM64in.VTriS.dst = lookupHRegRemap(m, i->ARM64in.VTriS.dst); + i->ARM64in.VTriS.arg1 = lookupHRegRemap(m, i->ARM64in.VTriS.arg1); + i->ARM64in.VTriS.arg2 = lookupHRegRemap(m, i->ARM64in.VTriS.arg2); + i->ARM64in.VTriS.arg3 = lookupHRegRemap(m, i->ARM64in.VTriS.arg3); + return; case ARM64in_VCmpD: i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL); i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR); @@ -2812,7 +2886,8 @@ static inline UInt qregEnc ( HReg r ) #define X11110011 BITS8(1,1,1,1,0,0,1,1) #define X11110101 BITS8(1,1,1,1,0,1,0,1) #define X11110111 BITS8(1,1,1,1,0,1,1,1) - +#define X11111000 BITS8(1,1,1,1,1,0,0,0) +#define X11111010 BITS8(1,1,1,1,1,0,1,0) /* --- 4 fields --- */ @@ -2972,6 +3047,27 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3, } +static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4, + UInt f5, UInt f6, UInt f7 ) { + vassert(3+8+5+1+5+5+5 == 32); + vassert(f1 < (1<<3)); + vassert(f2 < (1<<8)); + vassert(f3 < (1<<5)); + vassert(f4 < (1<<1)); + vassert(f5 < (1<<5)); + vassert(f6 < (1<<5)); + vassert(f7 < (1<<5)); + UInt w = 0; + w = (w << 3) | f1; + w = (w << 8) | f2; + w = (w << 5) | f3; + w = (w << 1) | f4; + w = (w << 5) | f5; + w = (w << 5) | f6; + w = (w << 5) | f7; + return w; +} + //ZZ #define X0000 BITS4(0,0,0,0) //ZZ #define X0001 BITS4(0,0,0,1) //ZZ #define X0010 BITS4(0,0,1,0) @@ -4339,6 +4435,44 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD); goto done; } + case ARM64in_VTriD: { + /* 31 20 15 14 9 4 + 000 11111 010 m 0 a n d FMADD Dd,Dn,Dm,Da + ---------------- 1 ------ FMSUB ----------- + */ + UInt dD = dregEnc(i->ARM64in.VTriD.dst); + UInt dN = dregEnc(i->ARM64in.VTriD.arg1); + UInt dM = dregEnc(i->ARM64in.VTriD.arg2); + UInt dA = dregEnc(i->ARM64in.VTriD.arg3); + UInt b15 = 2; /* impossible */ + switch (i->ARM64in.VTriD.op) { + case ARM64fpt_FMADD: b15 = 0; break; + case ARM64fpt_FMSUB: b15 = 1; break; + default: goto bad; + } + vassert(b15 < 2); + *p++ = X_3_8_5_1_5_5_5(X000, X11111010, dM, b15, dA, dN, dD); + goto done; + } + case ARM64in_VTriS: { + /* 31 20 15 14 9 4 + 000 11111 000 m 0 a n d FMADD Dd,Dn,Dm,Da + ---------------- 1 ------ FMSUB ----------- + */ + UInt dD = dregEnc(i->ARM64in.VTriD.dst); + UInt dN = dregEnc(i->ARM64in.VTriD.arg1); + UInt dM = dregEnc(i->ARM64in.VTriD.arg2); + UInt dA = dregEnc(i->ARM64in.VTriD.arg3); + UInt b15 = 2; /* impossible */ + switch (i->ARM64in.VTriD.op) { + case ARM64fpt_FMADD: b15 = 0; break; + case ARM64fpt_FMSUB: b15 = 1; break; + default: goto bad; + } + vassert(b15 < 2); + *p++ = X_3_8_5_1_5_5_5(X000, X11111000, dM, b15, dA, dN, dD); + goto done; + } case ARM64in_VCmpD: { /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */ UInt dN = dregEnc(i->ARM64in.VCmpD.argL); diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 05dba7ab8b..5a82564ce6 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -289,6 +289,14 @@ typedef } ARM64FpBinOp; +typedef + enum { + ARM64fpt_FMADD=105, + ARM64fpt_FMSUB, + ARM64fpt_INVALID + } + ARM64FpTriOp; + typedef enum { ARM64fpu_NEG=110, @@ -498,6 +506,8 @@ typedef ARM64in_VUnaryS, ARM64in_VBinD, ARM64in_VBinS, + ARM64in_VTriD, + ARM64in_VTriS, ARM64in_VCmpD, ARM64in_VCmpS, ARM64in_VFCSel, @@ -799,6 +809,22 @@ typedef HReg argL; HReg argR; } VBinS; + /* 64-bit FP ternary arithmetic */ + struct { + ARM64FpTriOp op; + HReg dst; + HReg arg1; + HReg arg2; + HReg arg3; + } VTriD; + /* 32-bit FP ternary arithmetic */ + struct { + ARM64FpTriOp op; + HReg dst; + HReg arg1; + HReg arg2; + HReg arg3; + } VTriS; /* 64-bit FP compare */ struct { HReg argL; @@ -970,6 +996,10 @@ extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg ); extern ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op, HReg dst, + HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op, HReg dst, + HReg, HReg, HReg ); extern ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ); extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ); extern ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR, diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 2f19eab814..da1218715e 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -3255,6 +3255,25 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) } } + if (e->tag == Iex_Qop) { + IRQop* qop = e->Iex.Qop.details; + ARM64FpTriOp triop = ARM64fpt_INVALID; + switch (qop->op) { + case Iop_MAddF64: triop = ARM64fpt_FMADD; break; + case Iop_MSubF64: triop = ARM64fpt_FMSUB; break; + default: break; + } + if (triop != ARM64fpt_INVALID) { + HReg N = iselDblExpr(env, qop->arg2); + HReg M = iselDblExpr(env, qop->arg3); + HReg A = iselDblExpr(env, qop->arg4); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, qop->arg1); + addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A)); + return dst; + } + } + if (e->tag == Iex_ITE) { /* ITE(ccexpr, iftrue, iffalse) */ ARM64CondCode cc; @@ -3450,6 +3469,26 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + if (e->tag == Iex_Qop) { + IRQop* qop = e->Iex.Qop.details; + ARM64FpTriOp triop = ARM64fpt_INVALID; + switch (qop->op) { + case Iop_MAddF32: triop = ARM64fpt_FMADD; break; + case Iop_MSubF32: triop = ARM64fpt_FMSUB; break; + default: break; + } + + if (triop != ARM64fpt_INVALID) { + HReg N = iselFltExpr(env, qop->arg2); + HReg M = iselFltExpr(env, qop->arg3); + HReg A = iselFltExpr(env, qop->arg4); + HReg dst = newVRegD(env); + set_FPCR_rounding_mode(env, qop->arg1); + addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A)); + return dst; + } + } + ppIRExpr(e); vpanic("iselFltExpr_wrk"); } diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am index 7b3ebbdca9..4ecab36add 100644 --- a/none/tests/arm64/Makefile.am +++ b/none/tests/arm64/Makefile.am @@ -10,14 +10,16 @@ EXTRA_DIST = \ integer.stdout.exp integer.stderr.exp integer.vgtest \ memory.stdout.exp memory.stderr.exp memory.vgtest \ atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \ - simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest + simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \ + fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest check_PROGRAMS = \ allexec \ cvtf_imm \ fp_and_simd \ integer \ - memory + memory \ + fmadd_sub if BUILD_ARMV8_CRC_TESTS check_PROGRAMS += crc32 diff --git a/none/tests/arm64/fmadd_sub.c b/none/tests/arm64/fmadd_sub.c new file mode 100644 index 0000000000..dcab22d1bc --- /dev/null +++ b/none/tests/arm64/fmadd_sub.c @@ -0,0 +1,98 @@ +#include <math.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#define COUNT 5 + +static void +print_float(const char *ident, float x) +{ + union + { + float f; + uint32_t i; + } u; + + u.f = x; + printf("%s = %08x = %.17g\n", ident, u.i, x); +} + +static void +print_double(const char *ident, double x) +{ + union + { + double f; + uint64_t i; + } u; + + u.f = x; + printf("%s = %016lx = %.17g\n", ident, u.i, x); +} + +int +main(int argc, char **argv) +{ + float x[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ; + float y[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF }; + float z[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF }; + float dst = -5; + + double dx[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ; + double dy[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF }; + double dz[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF }; + double ddst= -5; + + int i; + + for (i = 0; i < COUNT; i++) { + //32bit variant + asm("fmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); + printf("FMADD 32bit: dst = z + x * y\n"); + printf("%f = %f + %f * %f\n", dst, z[i], x[i], y[i]); + print_float("dst", dst); + + // Floating-point negated fused multiply-add + asm("fnmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); + printf("FNMADD 32bit: dst = -z + (-x) * y\n"); + printf("%f = -%f + (-%f) * %f\n", dst, z[i], x[i], y[i]); + print_float("dst", dst); + + asm("fmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); + printf("FMSUB 32bit: dst = z + (-x) * y\n"); + printf("%f = %f + (-%f) * %f\n", dst, z[i], x[i], y[i]); + print_float("dst", dst); + + asm("fnmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); + printf("FNMSUB 32bit: dst = -z + x * y\n"); + printf("%f = -%f + %f * %f\n", dst, z[i], x[i], y[i]); + print_float("dst", dst); + + //64bit variant + asm("fmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); + printf("FMADD 64bit: dst = z + x * y\n"); + printf("%f = %f + %f * %f\n", ddst, dz[i], dx[i], dy[i]); + print_double("dst", ddst); + + asm("fnmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); + printf("FNMADD 64bit: dst = -z + (-x) * y\n"); + printf("%f = -%f - %f * %f\n", ddst, dz[i], dx[i], dy[i]); + print_double("dst", ddst); + + asm("fmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); + printf("FMSUB 64bit: dst = z + (-x) * y\n"); + printf("%f = %f + (-%f) * %f\n", ddst, dz[i], dx[i], dy[i]); + print_double("dst", ddst); + + asm("fnmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); + printf("FNMSUB 64bit: dst = -z + x * y\n"); + printf("%f = -%f + %f * %f\n", ddst, dz[i], dx[i], dy[i]); + print_double("dst", ddst); + + printf("\n"); + } + + return 0; +} + diff --git a/none/tests/arm64/fmadd_sub.stderr.exp b/none/tests/arm64/fmadd_sub.stderr.exp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/none/tests/arm64/fmadd_sub.stdout.exp b/none/tests/arm64/fmadd_sub.stdout.exp new file mode 100644 index 0000000000..f1824b12b0 --- /dev/null +++ b/none/tests/arm64/fmadd_sub.stdout.exp @@ -0,0 +1,125 @@ +FMADD 32bit: dst = z + x * y +76.246193 = 38.123096 + 55.000000 * 0.693147 +dst = 42987e0d = 76.246192932128906 +FNMADD 32bit: dst = -z + (-x) * y +-76.246193 = -38.123096 + (-55.000000) * 0.693147 +dst = c2987e0d = -76.246192932128906 +FMSUB 32bit: dst = z + (-x) * y +0.000001 = 38.123096 + (-55.000000) * 0.693147 +dst = 35c00000 = 1.430511474609375e-06 +FNMSUB 32bit: dst = -z + x * y +-0.000001 = -38.123096 + 55.000000 * 0.693147 +dst = b5c00000 = -1.430511474609375e-06 +FMADD 64bit: dst = z + x * y +76.246190 = 38.123095 + 55.000000 * 0.693147 +dst = 40530fc1931f09c9 = 76.246189861593976 +FNMADD 64bit: dst = -z + (-x) * y +-76.246190 = -38.123095 - 55.000000 * 0.693147 +dst = c0530fc1931f09c9 = -76.246189861593976 +FMSUB 64bit: dst = z + (-x) * y +-0.000000 = 38.123095 + (-55.000000) * 0.693147 +dst = bce9000000000000 = -2.7755575615628914e-15 +FNMSUB 64bit: dst = -z + x * y +0.000000 = -38.123095 + 55.000000 * 0.693147 +dst = 3ce9000000000000 = 2.7755575615628914e-15 + +FMADD 32bit: dst = z + x * y +1.996086 = 1.000000 + 0.980762 * 1.015625 +dst = 3fff7fc0 = 1.9960861206054688 +FNMADD 32bit: dst = -z + (-x) * y +-1.996086 = -1.000000 + (-0.980762) * 1.015625 +dst = bfff7fc0 = -1.9960861206054688 +FMSUB 32bit: dst = z + (-x) * y +0.003914 = 1.000000 + (-0.980762) * 1.015625 +dst = 3b80401a = 0.00391389150172472 +FNMSUB 32bit: dst = -z + x * y +-0.003914 = -1.000000 + 0.980762 * 1.015625 +dst = bb80401a = -0.00391389150172472 +FMADD 64bit: dst = z + x * y +1.996086 = 1.000000 + 0.980762 * 1.015625 +dst = 3fffeff800000000 = 1.9960861206054688 +FNMADD 64bit: dst = -z + (-x) * y +-1.996086 = -1.000000 - 0.980762 * 1.015625 +dst = bfffeff800000000 = -1.9960861206054688 +FMSUB 64bit: dst = z + (-x) * y +0.003914 = 1.000000 + (-0.980762) * 1.015625 +dst = 3f70080000000034 = 0.0039138793945312951 +FNMSUB 64bit: dst = -z + x * y +-0.003914 = -1.000000 + 0.980762 * 1.015625 +dst = bf70080000000034 = -0.0039138793945312951 + +FMADD 32bit: dst = z + x * y +0.000000 = 0.000000 + 0.000000 * 0.000000 +dst = 00000000 = 0 +FNMADD 32bit: dst = -z + (-x) * y +-0.000000 = -0.000000 + (-0.000000) * 0.000000 +dst = 80000000 = -0 +FMSUB 32bit: dst = z + (-x) * y +0.000000 = 0.000000 + (-0.000000) * 0.000000 +dst = 00000000 = 0 +FNMSUB 32bit: dst = -z + x * y +0.000000 = -0.000000 + 0.000000 * 0.000000 +dst = 00000000 = 0 +FMADD 64bit: dst = z + x * y +0.000000 = 0.000000 + 0.000000 * 0.000000 +dst = 0000000000000000 = 0 +FNMADD 64bit: dst = -z + (-x) * y +-0.000000 = -0.000000 - 0.000000 * 0.000000 +dst = 8000000000000000 = -0 +FMSUB 64bit: dst = z + (-x) * y +0.000000 = 0.000000 + (-0.000000) * 0.000000 +dst = 0000000000000000 = 0 +FNMSUB 64bit: dst = -z + x * y +0.000000 = -0.000000 + 0.000000 * 0.000000 +dst = 0000000000000000 = 0 + +FMADD 32bit: dst = z + x * y +2.000000 = 1.000000 + 1.000000 * 1.000000 +dst = 40000000 = 2 +FNMADD 32bit: dst = -z + (-x) * y +-2.000000 = -1.000000 + (-1.000000) * 1.000000 +dst = c0000000 = -2 +FMSUB 32bit: dst = z + (-x) * y +0.000000 = 1.000000 + (-1.000000) * 1.000000 +dst = 00000000 = 0 +FNMSUB 32bit: dst = -z + x * y +0.000000 = -1.000000 + 1.000000 * 1.000000 +dst = 00000000 = 0 +FMADD 64bit: dst = z + x * y +2.000000 = 1.000000 + 1.000000 * 1.000000 +dst = 4000000000000000 = 2 +FNMADD 64bit: dst = -z + (-x) * y +-2.000000 = -1.000000 - 1.000000 * 1.000000 +dst = c000000000000000 = -2 +FMSUB 64bit: dst = z + (-x) * y +0.000000 = 1.000000 + (-1.000000) * 1.000000 +dst = 0000000000000000 = 0 +FNMSUB 64bit: dst = -z + x * y +0.000000 = -1.000000 + 1.000000 * 1.000000 +dst = 0000000000000000 = 0 + +FMADD 32bit: dst = z + x * y +18446744073709551616.000000 = 4294967296.000000 + 4294967296.000000 * 4294967296.000000 +dst = 5f800000 = 1.8446744073709552e+19 +FNMADD 32bit: dst = -z + (-x) * y +-18446744073709551616.000000 = -4294967296.000000 + (-4294967296.000000) * 4294967296.000000 +dst = df800000 = -1.8446744073709552e+19 +FMSUB 32bit: dst = z + (-x) * y +-18446744073709551616.000000 = 4294967296.000000 + (-4294967296.000000) * 4294967296.000000 +dst = df800000 = -1.8446744073709552e+19 +FNMSUB 32bit: dst = -z + x * y +18446744073709551616.000000 = -4294967296.000000 + 4294967296.000000 * 4294967296.000000 +dst = 5f800000 = 1.8446744073709552e+19 +FMADD 64bit: dst = z + x * y +18446744069414584320.000000 = 4294967295.000000 + 4294967295.000000 * 4294967295.000000 +dst = 43efffffffe00000 = 1.8446744069414584e+19 +FNMADD 64bit: dst = -z + (-x) * y +-18446744069414584320.000000 = -4294967295.000000 - 4294967295.000000 * 4294967295.000000 +dst = c3efffffffe00000 = -1.8446744069414584e+19 +FMSUB 64bit: dst = z + (-x) * y +-18446744060824649728.000000 = 4294967295.000000 + (-4294967295.000000) * 4294967295.000000 +dst = c3efffffffa00000 = -1.844674406082465e+19 +FNMSUB 64bit: dst = -z + x * y +18446744060824649728.000000 = -4294967295.000000 + 4294967295.000000 * 4294967295.000000 +dst = 43efffffffa00000 = 1.844674406082465e+19 + diff --git a/none/tests/arm64/fmadd_sub.vgtest b/none/tests/arm64/fmadd_sub.vgtest new file mode 100644 index 0000000000..b4c53eea4e --- /dev/null +++ b/none/tests/arm64/fmadd_sub.vgtest @@ -0,0 +1,3 @@ +prog: fmadd_sub +prereq: test -x fmadd_sub +vgopts: -q |
|
From: Mark W. <ma...@so...> - 2020-12-15 10:58:39
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ab257bc49a6c8beefa794470446f917ec441f718 commit ab257bc49a6c8beefa794470446f917ec441f718 Author: Mark Wielaard <ma...@kl...> Date: Tue Dec 15 11:49:58 2020 +0100 ppc stxsibx and stxsihx instructions write too much data stxsibx (Store VSX Scalar as Integer Byte Indexed X-form) is implemented by first reading a whole word, merging in the new byte, and then writing out the whole word. Causing memcheck to warn when the destination might have room for less than 8 bytes. The stxsihx (Store VSX Scalar as Integer Halfword Indexed X-form) instruction does something similar reading and then writing a full word instead of a half word. The code can be simplified (and made more correct) by storing the byte (or half-word) directly, IRStmt_Store seems fine to store byte or half word sized data, and so seems the ppc backend. https://bugs.kde.org/show_bug.cgi?id=430354 Diff: --- NEWS | 7 ++++--- VEX/priv/guest_ppc_toIR.c | 36 ++++++++---------------------------- 2 files changed, 12 insertions(+), 31 deletions(-) diff --git a/NEWS b/NEWS index 7217273b28..45ee61d5ad 100644 --- a/NEWS +++ b/NEWS @@ -55,6 +55,9 @@ where XXXXXX is the bug number as listed below. 369029 handle linux syscalls sched_getattr and sched_setattr n-i-bz helgrind: If hg_cli__realloc fails, return NULL. +384729 __libc_freeres inhibits cross-platform valgrind +408663 Suppression file for musl libc +404076 s390x: z14 vector instructions not implemented 415293 Incorrect call-graph tracking due to new _dl_runtime_resolve_xsave* 422174 unhandled instruction bytes: 0x48 0xE9 (REX prefixed JMP instruction) 422623 epoll_ctl warns for uninitialized padding on non-amd64 64bit arches @@ -69,15 +72,13 @@ n-i-bz helgrind: If hg_cli__realloc fails, return NULL. 428648 s390_emit_load_mem panics due to 20-bit offset for vector load 427400 PPC ISA 3.1 support is missing, part 4 427401 PPC ISA 3.1 support is missing, part 5 -384729 __libc_freeres inhibits cross-platform valgrind 427870 lmw, lswi and related PowerPC insns aren't allowed on ppc64le 427404 PPC ISA 3.1 support is missing, part 6 429692 unhandled ppc64le-linux syscall: 147 (getsid) 428909 helgrind: need to intercept duplicate libc definitions for Fedora 33 429864 s390x: C++ atomic test_and_set yields false-positive memcheck diagnostics -408663 Suppression file for musl libc -404076 s390x: z14 vector instructions not implemented +430354 ppc stxsibx and stxsihx instructions write too much data Release 3.16.1 (?? June 2020) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index c9c058a7ab..e7b576fa24 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -25671,49 +25671,29 @@ dis_vx_store ( UInt prefix, UInt theInstr ) case 0x38D: // stxsibx { - IRExpr *stored_word; - IRTemp byte_to_store = newTemp( Ity_I64 ); + IRTemp byte_to_store = newTemp( Ity_I8 ); DIP("stxsibx %u,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr); - /* Can't store just a byte, need to fetch the word at EA merge data - * and store. - */ - stored_word = load( Ity_I64, mkexpr( EA ) ); - assign( byte_to_store, binop( Iop_And64, + assign( byte_to_store, unop( Iop_64to8, unop( Iop_V128HIto64, - mkexpr( vS ) ), - mkU64( 0xFF ) ) ); + mkexpr( vS ) ) ) ); - store( mkexpr( EA ), binop( Iop_Or64, - binop( Iop_And64, - stored_word, - mkU64( 0xFFFFFFFFFFFFFF00 ) ), - mkexpr( byte_to_store ) ) ); + store( mkexpr( EA ), mkexpr( byte_to_store ) ); break; } case 0x3AD: // stxsihx { - IRExpr *stored_word; - IRTemp byte_to_store = newTemp( Ity_I64 ); + IRTemp hword_to_store = newTemp( Ity_I16 ); DIP("stxsihx %u,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr); - /* Can't store just a halfword, need to fetch the word at EA merge data - * and store. - */ - stored_word = load( Ity_I64, mkexpr( EA ) ); - assign( byte_to_store, binop( Iop_And64, + assign( hword_to_store, unop( Iop_64to16, unop( Iop_V128HIto64, - mkexpr( vS ) ), - mkU64( 0xFFFF ) ) ); + mkexpr( vS ) ) ) ); - store( mkexpr( EA ), binop( Iop_Or64, - binop( Iop_And64, - stored_word, - mkU64( 0xFFFFFFFFFFFF0000 ) ), - mkexpr( byte_to_store ) ) ); + store( mkexpr( EA ), mkexpr( hword_to_store ) ); break; } |