You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
1
|
2
|
3
|
4
|
5
(1) |
6
(4) |
|
7
(6) |
8
(1) |
9
(3) |
10
|
11
(6) |
12
|
13
(1) |
|
14
|
15
(1) |
16
(2) |
17
(3) |
18
|
19
(1) |
20
|
|
21
|
22
(1) |
23
|
24
|
25
|
26
(14) |
27
(2) |
|
28
|
29
(2) |
30
|
31
|
|
|
|
|
From: Andreas A. <ar...@so...> - 2023-05-11 16:06:07
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=96a830df537e2ebd02f0b96603c986351ac5d6f8 commit 96a830df537e2ebd02f0b96603c986351ac5d6f8 Author: Andreas Arnez <ar...@li...> Date: Thu Jan 26 17:41:18 2023 +0100 s390x: XC instruction: clear in 8-byte increments if possible The XC instruction is frequently executed in many programs, mainly for clearing memory. It can target from 1 to 256 bytes. If the size is constant and XC is actually used for clearing memory, Valgrind implements it as a byte-wise loop and rolls out the loop for <= 8 bytes. Instead of clearing byte-wise, it is more efficient to clear in 64-bit increments, so do this for sizes >= 8 bytes. Roll out the loop for up to 32 bytes. Overall, this reduces the number of insns by a few percent and provides a slight performance improvement for some programs. Diff: --- VEX/priv/guest_s390_toIR.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 39356e088f..11dda41ef5 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -13615,36 +13615,36 @@ s390_irgen_XC(UChar length, IRTemp start1, IRTemp start2) static void s390_irgen_XC_sameloc(UChar length, UChar b, UShort d) { - IRTemp counter = newTemp(Ity_I32); IRTemp start = newTemp(Ity_I64); - IRTemp addr = newTemp(Ity_I64); - assign(start, binop(Iop_Add64, mkU64(d), b != 0 ? get_gpr_dw0(b) : mkU64(0))); - if (length < 8) { - UInt i; - - for (i = 0; i <= length; ++i) { + if (length < 7) { + for (UInt i = 0; i <= length; ++i) { store(binop(Iop_Add64, mkexpr(start), mkU64(i)), mkU8(0)); } } else { - assign(counter, get_counter_w0()); - - assign(addr, binop(Iop_Add64, mkexpr(start), - unop(Iop_32Uto64, mkexpr(counter)))); - - store(mkexpr(addr), mkU8(0)); - - /* Check for end of field */ - put_counter_w0(binop(Iop_Add32, mkexpr(counter), mkU32(1))); - iterate_if(binop(Iop_CmpNE32, mkexpr(counter), mkU32(length))); - - /* Reset counter */ - put_counter_dw0(mkU64(0)); + if (length < 32) { + for (UInt i = 0; i <= length - 7; i += 8) { + store(binop(Iop_Add64, mkexpr(start), mkU64(i)), mkU64(0)); + } + } else { + IRTemp counter = newTemp(Ity_I64); + assign(counter, get_counter_dw0()); + store(binop(Iop_Add64, mkexpr(start), mkexpr(counter)), mkU64(0)); + put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(8))); + iterate_if(binop(Iop_CmpLE64U, mkexpr(counter), mkU64(length - 15))); + + /* Reset counter */ + put_counter_dw0(mkU64(0)); + } + /* Clear the remaining bytes with backward overlap */ + if ((length + 1) % 8 != 0) { + store(binop(Iop_Add64, mkexpr(start), mkU64(length - 7)), mkU64(0)); + } } - s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, mkU32(0)), False); + s390_cc_set_val(0); if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d, length, b, d, 0, b); |
|
From: Andreas A. <ar...@so...> - 2023-05-11 16:06:02
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=97d335621a60a3c29ded56ea4a29ae1968ed7c8a commit 97d335621a60a3c29ded56ea4a29ae1968ed7c8a Author: Andreas Arnez <ar...@li...> Date: Fri May 5 17:48:31 2023 +0200 s390x: Optimize CLC for 1, 2, 4, and 8 bytes The CLC instruction compares two memory areas with sizes from 1 up to 256 bytes. Currently Valgrind always implements it with a bytewise loop. Add special handling for the sizes 1, 2, 4, and 8. Realize CLC with an 8-, 16-, 32-, and 64-bit integer comparison, respectively, in those cases. Apart from a slight optimization this also improves the diagnostics for uninitialized values since it avoids the manufactured conditional jump that breaks out of the loop over the individual bytes. Diff: --- VEX/priv/guest_s390_toIR.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c index 250daeca13..39356e088f 100644 --- a/VEX/priv/guest_s390_toIR.c +++ b/VEX/priv/guest_s390_toIR.c @@ -12849,11 +12849,28 @@ s390_irgen_TDGXT(UChar r1, IRTemp op2addr) static const HChar * s390_irgen_CLC(UChar length, IRTemp start1, IRTemp start2) { - IRTemp len = newTemp(Ity_I64); - - assign(len, mkU64(length)); - s390_irgen_CLC_EX(len, start1, start2); + IRType ty; + + switch (length) { + case 0: ty = Ity_I8; break; + case 1: ty = Ity_I16; break; + case 3: ty = Ity_I32; break; + case 7: ty = Ity_I64; break; + default: ty = Ity_INVALID; + } + if (ty != Ity_INVALID) { + IRTemp a = newTemp(ty); + IRTemp b = newTemp(ty); + + assign(a, load(ty, mkexpr(start1))); + assign(b, load(ty, mkexpr(start2))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, a, b); + } else { + IRTemp len = newTemp(Ity_I64); + assign(len, mkU64(length)); + s390_irgen_CLC_EX(len, start1, start2); + } return "clc"; } |
|
From: Andreas A. <ar...@so...> - 2023-05-11 16:05:57
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=afa21cd6a259c0b16fdfd78eb84d1ed221cf8268 commit afa21cd6a259c0b16fdfd78eb84d1ed221cf8268 Author: Andreas Arnez <ar...@li...> Date: Tue Jan 24 16:11:12 2023 +0100 s390x: Update information in README.s390 Refresh some of the information in README.s390: * update links to Principles of Operation and Reference Summary * no longer recommend compiling with '-fno-builtin', since there are no known false positives due to builtin string operations anymore * don't mention callgrind as being unsupported, since there are no known issues with it Diff: --- README.s390 | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/README.s390 b/README.s390 index a146eba259..86f867188a 100644 --- a/README.s390 +++ b/README.s390 @@ -13,14 +13,13 @@ Limitations - Hexadecimal floating point is not supported. - Transactional memory is not supported. The transactional-execution facility is masked off from HWCAP. +- A full list of unimplemented instructions can be retrieved from + `docs/internals/s390-opcodes.csv', by grepping for "not implemented". - FP signalling is not accurate. E.g., the "compare and signal" instructions behave like their non-signalling counterparts. -- memcheck, cachegrind, drd, helgrind, massif, lackey, and none are - supported. - On machine models predating z10, cachegrind will assume a z10 cache architecture. Otherwise, cachegrind will query the hosts cache system and use those parameters. -- callgrind and all experimental tools are currently not supported. - Some gcc versions use mvc to copy 4/8 byte values. This will affect certain debug messages. For example, memcheck will complain about 4 one-byte reads/writes instead of just a single read/write. @@ -38,17 +37,11 @@ may not work. As a consequence, programs that attempt to do so may behave differently. It is believed that this is a rare use case. -Recommendations ---------------- -Applications should be compiled with -fno-builtin to avoid -false positives due to builtin string operations when running memcheck. - - Reading Material ---------------- (1) ELF ABI s390x Supplement https://github.com/IBM/s390x-abi/releases (2) z/Architecture Principles of Operation - http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf + https://www.ibm.com/support/pages/zarchitecture-principles-operation (3) z/Architecture Reference Summary - https://www.ibm.com/support/pages/system/files/2020-07/SA22-7871-10.pdf + https://www.ibm.com/support/pages/zarchitecture-reference-summary |
|
From: Andreas A. <ar...@so...> - 2023-05-11 10:52:07
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=053643239ed39e4f6137e78dfd3fd8e259f7827c commit 053643239ed39e4f6137e78dfd3fd8e259f7827c Author: Andreas Arnez <ar...@li...> Date: Thu Sep 1 15:03:01 2022 +0200 Bug 465782 - s390x: Drop -mzarch -march=z900 from assembler options The -mzarch flag is unsupported by Clang, and it is redundant on 64-bit build systems. Remove it. Also remove '-march=z900', since it is unsupported by Clang as well. It would only be needed on build systems with a default architecture lower than z900. Such systems are out of service for some time now. Diff: --- Makefile.all.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.all.am b/Makefile.all.am index 1de1f13a7e..94d65f2ccb 100755 --- a/Makefile.all.am +++ b/Makefile.all.am @@ -270,7 +270,7 @@ AM_CCASFLAGS_AMD64_DARWIN = -arch x86_64 -g AM_FLAG_M3264_S390X_LINUX = @FLAG_M64@ AM_CFLAGS_S390X_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) -fomit-frame-pointer AM_CFLAGS_PSO_S390X_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) $(AM_CFLAGS_PSO_BASE) -AM_CCASFLAGS_S390X_LINUX = @FLAG_M64@ -g -mzarch -march=z900 +AM_CCASFLAGS_S390X_LINUX = @FLAG_M64@ -g AM_FLAG_M3264_MIPS32_LINUX = @FLAG_M32@ AM_CFLAGS_MIPS32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) |
|
From: Andreas A. <ar...@so...> - 2023-05-11 10:52:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=165c3769580b7f256aedb0b5d48972fa28b30094 commit 165c3769580b7f256aedb0b5d48972fa28b30094 Author: Andreas Arnez <ar...@li...> Date: Tue Feb 14 15:35:48 2023 +0100 Bug 465782 - s390x: Adjust inline assemblies for Clang Some s390x inline assemblies are written in such a way that they are understood by GCC but not by Clang: * use of "f" constraint for unsigned long variable * use of "b" modifier when passing a constant * use of the register notation "r11" in ".cfi_def_cfa" Adjust the affected inline assemblies to avoid these constructs. Diff: --- VEX/priv/guest_s390_helpers.c | 246 ++++++++++++++++++++++-------------------- coregrind/m_debuglog.c | 4 +- include/valgrind.h.in | 2 +- 3 files changed, 134 insertions(+), 118 deletions(-) diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c index 804b92a296..95bb653274 100644 --- a/VEX/priv/guest_s390_helpers.c +++ b/VEX/priv/guest_s390_helpers.c @@ -913,7 +913,7 @@ s390_do_cvb(ULong decimal) __asm__ volatile ( "cvb %[result],%[input]\n\t" : [result] "=d"(binary) - : [input] "m"(decimal) + : [input] "R"(decimal) ); return binary; @@ -1000,8 +1000,12 @@ UInt s390_do_pfpo(UInt gpr0) { return 0; } /*--- Helper for condition code. ---*/ /*------------------------------------------------------------*/ -/* Convert an IRRoundingMode value to s390_bfp_round_t */ #if defined(VGA_s390x) +typedef long double Float128; +union s390x_F64 { ULong i; Double f; }; +union s390x_F128 { struct { ULong hi, lo; } i; Float128 f; }; + +/* Convert an IRRoundingMode value to s390_bfp_round_t */ static s390_bfp_round_t decode_bfp_rounding_mode(UInt irrm) { @@ -1013,8 +1017,6 @@ decode_bfp_rounding_mode(UInt irrm) } vpanic("decode_bfp_rounding_mode"); } -#endif - #define S390_CC_FOR_BINARY(opcode,cc_dep1,cc_dep2) \ ({ \ @@ -1041,13 +1043,14 @@ decode_bfp_rounding_mode(UInt irrm) /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \ for rationale. */ \ cc_dep2 = cc_dep2 ^ cc_ndep; \ + ULong tmp = 1; \ __asm__ volatile ( \ - "lghi 0,1\n\t" \ - "sr 0,%[op3]\n\t" /* borrow to cc */ \ + "sr %[tmp],%[op3]\n\t" /* borrow to cc */ \ opcode " %[op1],%[op2]\n\t" /* then redo the op */\ - "ipm %[psw]\n\t" : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [op1] "+&d"(cc_dep1), \ + [tmp] "+&d"(tmp) \ : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \ - : "0", "cc");\ + : "cc");\ psw >> 28; /* cc */ \ }) @@ -1056,46 +1059,52 @@ decode_bfp_rounding_mode(UInt irrm) /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \ for rationale. */ \ cc_dep2 = cc_dep2 ^ cc_ndep; \ + ULong tmp; \ __asm__ volatile ( \ - "lgfr 0,%[op3]\n\t" /* first load cc_ndep */ \ - "aghi 0,0\n\t" /* and convert it into a cc */ \ + "lgfr %[tmp],%[op3]\n\t" /* first load cc_ndep */ \ + "aghi %[tmp],0\n\t" /* and convert it into a cc */ \ opcode " %[op1],%[op2]\n\t" /* then redo the op */\ - "ipm %[psw]\n\t" : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [op1] "+&d"(cc_dep1), \ + [tmp] "=&d"(tmp) \ : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \ - : "0", "cc");\ + : "cc");\ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_BFP_RESULT(opcode,cc_dep1) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + Double tmp; \ __asm__ volatile ( \ - opcode " 0,%[op]\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "f0");\ + opcode " %[tmp],%[op]\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_BFP128_RESULT(hi,lo) \ ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + Float128 tmp; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - "ltxbr 0,4\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "f0", "f2", "f4", "f6");\ + "ltxbr %[tmp],%[op]\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + ULong tmp; \ __asm__ volatile ( \ - opcode " 0," #rounding_mode ",%[op]\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "r0");\ + opcode " %[tmp]," #rounding_mode ",%[op]\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=d"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) @@ -1123,11 +1132,13 @@ decode_bfp_rounding_mode(UInt irrm) #define S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + ULong tmp; \ __asm__ volatile ( \ - opcode ",0,%[op]," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "r0");\ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=d"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) @@ -1155,13 +1166,13 @@ decode_bfp_rounding_mode(UInt irrm) #define S390_CC_FOR_BFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode) \ ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + ULong tmp; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - opcode " 0," #rounding_mode ",4\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "r0", "f4", "f6");\ + opcode " %[tmp]," #rounding_mode ",%[op]\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=d"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) @@ -1192,13 +1203,13 @@ decode_bfp_rounding_mode(UInt irrm) #define S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \ ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + ULong tmp; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - opcode ",0,4," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "r0", "f4", "f6");\ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=d"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) @@ -1229,10 +1240,11 @@ decode_bfp_rounding_mode(UInt irrm) #define S390_CC_FOR_BFP_TDC(opcode,cc_dep1,cc_dep2) \ ({ \ + union s390x_F64 val = { .i = cc_dep1 }; \ __asm__ volatile ( \ opcode " %[value],0(%[class])\n\t" \ "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [value] "f"(cc_dep1), \ + : [value] "f"(val.f), \ [class] "a"(cc_dep2) \ : "cc");\ psw >> 28; /* cc */ \ @@ -1243,19 +1255,16 @@ decode_bfp_rounding_mode(UInt irrm) /* Recover the original DEP2 value. See comment near \ s390_cc_thunk_put1f128Z for rationale. */ \ cc_dep2 = cc_dep2 ^ cc_ndep; \ + union s390x_F128 val = { .i = { cc_dep1, cc_dep2 } }; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - "tcxb 4,0(%[class])\n\t" \ + "tcxb %[value],0(%[class])\n\t" \ "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \ - [class] "a"(cc_ndep) \ - : "cc", "f4", "f6");\ + : [value] "f"(val.f), [class] "a"(cc_ndep) \ + : "cc");\ psw >> 28; /* cc */ \ }) /* Convert an IRRoundingMode value to s390_dfp_round_t */ -#if defined(VGA_s390x) static s390_dfp_round_t decode_dfp_rounding_mode(UInt irrm) { @@ -1279,36 +1288,38 @@ decode_dfp_rounding_mode(UInt irrm) } vpanic("decode_dfp_rounding_mode"); } -#endif #define S390_CC_FOR_DFP_RESULT(cc_dep1) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + Double tmp; \ __asm__ volatile ( \ - ".insn rre, 0xb3d60000,0,%[op]\n\t" /* LTDTR */ \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "f0"); \ + ".insn rre, 0xb3d60000,%[tmp],%[op]\n\t" /* LTDTR */ \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc");\ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_DFP128_RESULT(hi,lo) \ ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + Float128 tmp; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - ".insn rre, 0xb3de0000,0,4\n\t" /* LTXTR */ \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "f0", "f2", "f4", "f6"); \ + ".insn rre, 0xb3de0000,%[tmp],%[op]\n\t" /* LTXTR */ \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc"); \ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_DFP_TD(opcode,cc_dep1,cc_dep2) \ ({ \ + union s390x_F64 val = { .i = cc_dep1 }; \ __asm__ volatile ( \ opcode ",%[value],0(%[class])\n\t" \ "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [value] "f"(cc_dep1), \ + : [value] "f"(val.f), \ [class] "a"(cc_dep2) \ : "cc"); \ psw >> 28; /* cc */ \ @@ -1319,24 +1330,25 @@ decode_dfp_rounding_mode(UInt irrm) /* Recover the original DEP2 value. See comment near \ s390_cc_thunk_put1d128Z for rationale. */ \ cc_dep2 = cc_dep2 ^ cc_ndep; \ + union s390x_F128 val = { .i = { cc_dep1, cc_dep2 } }; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - opcode ",4,0(%[class])\n\t" \ + opcode ",%[value],0(%[class])\n\t" \ "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \ + : [value] "f"(val.f), \ [class] "a"(cc_ndep) \ - : "cc", "f4", "f6"); \ + : "cc"); \ psw >> 28; /* cc */ \ }) #define S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + Double tmp; \ __asm__ volatile ( \ - opcode ",0,%[op]," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "r0"); \ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc"); \ psw >> 28; /* cc */ \ }) @@ -1382,11 +1394,13 @@ decode_dfp_rounding_mode(UInt irrm) #define S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode) \ ({ \ + union s390x_F64 op = { .i = cc_dep1 }; \ + Double tmp; \ __asm__ volatile ( \ - opcode ",0,%[op]," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [op] "f"(cc_dep1) \ - : "cc", "r0"); \ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc"); \ psw >> 28; /* cc */ \ }) @@ -1432,13 +1446,13 @@ decode_dfp_rounding_mode(UInt irrm) #define S390_CC_FOR_DFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode) \ ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + Double tmp; \ __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - opcode ",0,4," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "r0", "f4", "f6"); \ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc"); \ psw >> 28; /* cc */ \ }) @@ -1485,16 +1499,16 @@ decode_dfp_rounding_mode(UInt irrm) cc; \ }) -#define S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \ - ({ \ - __asm__ volatile ( \ - "ldr 4,%[high]\n\t" \ - "ldr 6,%[low]\n\t" \ - opcode ",0,4," #rounding_mode ",0\n\t" \ - "ipm %[psw]\n\t" : [psw] "=d"(psw) \ - : [high] "f"(hi), [low] "f"(lo) \ - : "cc", "r0", "f4", "f6"); \ - psw >> 28; /* cc */ \ +#define S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \ + ({ \ + union s390x_F128 op = { .i = { hi, lo } }; \ + Double tmp; \ + __asm__ volatile ( \ + opcode ",%[tmp],%[op]," #rounding_mode ",0\n\t" \ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [tmp] "=f"(tmp) \ + : [op] "f"(op.f) \ + : "cc"); \ + psw >> 28; /* cc */ \ }) #define S390_CC_FOR_DFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep) \ @@ -1539,6 +1553,7 @@ decode_dfp_rounding_mode(UInt irrm) } \ cc; \ }) +#endif /* VGA_s390x */ /* Return the value of the condition code from the supplied thunk parameters. @@ -1620,14 +1635,15 @@ s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep) case S390_CC_OP_TEST_UNDER_MASK_8: { UChar value = cc_dep1; UChar mask = cc_dep2; + ULong pc; __asm__ volatile ( - "bras %%r2,1f\n\t" /* %r2 = address of next insn */ + "bras %[pc],1f\n\t" /* pc = address of next insn */ "tm %[value],0\n\t" /* this is skipped, then EXecuted */ - "1: ex %[mask],0(%%r2)\n\t" /* EXecute TM after modifying mask */ - "ipm %[psw]\n\t" : [psw] "=d"(psw) - : [value] "m"(value), [mask] "a"(mask) - : "r2", "cc"); + "1: ex %[mask],0(%[pc])\n\t" /* EXecute TM after modifying mask */ + "ipm %[psw]\n\t" : [psw] "=d"(psw), [pc] "=&a"(pc) + : [value] "Q"(value), [mask] "a"(mask) + : "cc"); return psw >> 28; /* cc */ } @@ -1641,7 +1657,7 @@ s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep) "lhi 2,0x10\n\t" "ex 2,%[insn]\n\t" "ipm %[psw]\n\t" : [psw] "=d"(psw) - : [value] "d"(value), [insn] "m"(insn) + : [value] "d"(value), [insn] "R"(insn) : "r1", "r2", "cc"); return psw >> 28; /* cc */ } @@ -1812,11 +1828,11 @@ s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep) case S390_CC_OP_PFPO_32: { __asm__ volatile( - "ler 4, %[cc_dep1]\n\t" /* 32 bit FR move */ - "lr 0, %[cc_dep2]\n\t" /* 32 bit GR move */ - ".short 0x010a\n\t" /* PFPO */ - "ipm %[psw]\n\t" : [psw] "=d"(psw) - : [cc_dep1] "f"(cc_dep1), + "ldgr 4, %[cc_dep1]\n\t" /* Load FR from GR */ + "lr 0, %[cc_dep2]\n\t" /* 32 bit GR move */ + ".insn e,0x010a\n\t" /* PFPO */ + "ipm %[psw]\n\t" : [psw] "=d"(psw) + : [cc_dep1] "d"(cc_dep1), [cc_dep2] "d"(cc_dep2) : "r0", "r1", "f4"); return psw >> 28; /* cc */ @@ -1824,11 +1840,11 @@ s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep) case S390_CC_OP_PFPO_64: { __asm__ volatile( - "ldr 4, %[cc_dep1]\n\t" - "lr 0, %[cc_dep2]\n\t" /* 32 bit register move */ - ".short 0x010a\n\t" /* PFPO */ - "ipm %[psw]\n\t" : [psw] "=d"(psw) - : [cc_dep1] "f"(cc_dep1), + "ldgr 4, %[cc_dep1]\n\t" + "lr 0, %[cc_dep2]\n\t" /* 32 bit register move */ + ".insn e,0x010a\n\t" /* PFPO */ + "ipm %[psw]\n\t" : [psw] "=d"(psw) + : [cc_dep1] "d"(cc_dep1), [cc_dep2] "d"(cc_dep2) : "r0", "r1", "f4"); return psw >> 28; /* cc */ @@ -1836,13 +1852,13 @@ s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep) case S390_CC_OP_PFPO_128: { __asm__ volatile( - "ldr 4,%[cc_dep1]\n\t" - "ldr 6,%[cc_dep2]\n\t" - "lr 0,%[cc_ndep]\n\t" /* 32 bit register move */ - ".short 0x010a\n\t" /* PFPO */ - "ipm %[psw]\n\t" : [psw] "=d"(psw) - : [cc_dep1] "f"(cc_dep1), - [cc_dep2] "f"(cc_dep2), + "ldgr 4,%[cc_dep1]\n\t" + "ldgr 6,%[cc_dep2]\n\t" + "lr 0,%[cc_ndep]\n\t" /* 32 bit register move */ + ".insn e,0x010a\n\t" /* PFPO */ + "ipm %[psw]\n\t" : [psw] "=d"(psw) + : [cc_dep1] "d"(cc_dep1), + [cc_dep2] "d"(cc_dep2), [cc_ndep] "d"(cc_ndep) : "r0", "r1", "f0", "f2", "f4", "f6"); return psw >> 28; /* cc */ @@ -2689,7 +2705,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, [arg3] "r" (&guest_v[d->v4]), [zero] "r" (0ULL), - [insn] "m" (the_insn), + [insn] "R" (the_insn), [read_only] "r" (d->read_only) : "cc", "r10", "v16", "v17", "v18", "v19" diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c index 355c3caf5b..fd6db4beb4 100644 --- a/coregrind/m_debuglog.c +++ b/coregrind/m_debuglog.c @@ -404,7 +404,7 @@ static UInt local_sys_write_stderr ( const HChar* buf, Int n ) ULong __res; __asm__ __volatile__ ( - "svc %b1\n" + "svc %c1\n" : "=d" (r2_res) : "i" (__NR_write), "0" (r2), @@ -424,7 +424,7 @@ static UInt local_sys_getpid ( void ) ULong __res; __asm__ __volatile__ ( - "svc %b1\n" + "svc %c1\n" : "=d" (r2) : "i" (__NR_getpid) : "cc", "memory"); diff --git a/include/valgrind.h.in b/include/valgrind.h.in index aa0b431256..45f6522f34 100644 --- a/include/valgrind.h.in +++ b/include/valgrind.h.in @@ -4748,7 +4748,7 @@ typedef "lgr 1,%1\n\t" /* copy the argvec pointer in r1 */ \ "lgr 7,11\n\t" \ "lgr 11,%2\n\t" \ - ".cfi_def_cfa r11, 0\n\t" + ".cfi_def_cfa 11, 0\n\t" # define VALGRIND_CFI_EPILOGUE \ "lgr 11, 7\n\t" \ ".cfi_restore_state\n\t" |
|
From: Andreas A. <ar...@so...> - 2023-05-11 10:52:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=d3ee8178e1d28c409520646a4f776eb9b946456a commit d3ee8178e1d28c409520646a4f776eb9b946456a Author: Andreas Arnez <ar...@li...> Date: Wed Feb 15 18:02:37 2023 +0100 Bug 465782 - s390x: Avoid __builtin_setjmp Currently Clang doesn't support __builtin_setjmp() on s390x. Since Valgrind already has an alternate implementation of setjmp/longjmp for many other platforms, just add one for s390x as well, to get rid of this dependency. Diff: --- coregrind/m_libcsetjmp.c | 44 +++++++++++++++++++++++++++++++++++++++++-- include/pub_tool_libcsetjmp.h | 8 ++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/coregrind/m_libcsetjmp.c b/coregrind/m_libcsetjmp.c index 4f1ecb1502..f53a223528 100644 --- a/coregrind/m_libcsetjmp.c +++ b/coregrind/m_libcsetjmp.c @@ -36,9 +36,9 @@ /* See include/pub_tool_libcsetjmp.h for background and rationale. */ -/* The alternative implementations are for ppc{32,64}-linux and +/* The alternative implementations are for s390x-linux, ppc{32,64}-linux, and {amd64,x86}-{linux,darwin,solaris,freebsd}. See #259977. That leaves only - {arm,s390x}-linux using the gcc builtins now. + arm-linux using the gcc builtins now. */ /* ------------ ppc32-linux ------------ */ @@ -741,6 +741,46 @@ __asm__( ); #endif /* VGP_nanomips_linux */ +/* ------------ s390x-linux ------------ */ + +#if defined(VGP_s390x_linux) +__asm__( +".text" "\n" +".align 4" "\n" +".globl VG_MINIMAL_SETJMP" "\n" +".type VG_MINIMAL_SETJMP, @function" "\n" +"VG_MINIMAL_SETJMP:" "\n" +" stmg 6,15,0(2)" "\n" +" std 8,80(2)" "\n" +" std 9,88(2)" "\n" +" std 10,96(2)" "\n" +" std 11,104(2)" "\n" +" std 12,112(2)" "\n" +" std 13,120(2)" "\n" +" std 14,128(2)" "\n" +" std 15,136(2)" "\n" +// return zero +" lghi 2,0" "\n" +" br 14" "\n" + +".align 4" "\n" +".globl VG_MINIMAL_LONGJMP" "\n" +".type VG_MINIMAL_LONGJMP, @function" "\n" +"VG_MINIMAL_LONGJMP:" "\n" +" lmg 6,15,0(2)" "\n" +" ld 8,80(2)" "\n" +" ld 9,88(2)" "\n" +" ld 10,96(2)" "\n" +" ld 11,104(2)" "\n" +" ld 12,112(2)" "\n" +" ld 13,120(2)" "\n" +" ld 14,128(2)" "\n" +" ld 15,136(2)" "\n" +// return the argument (nonzero) +" br 14" "\n" +); +#endif /* VGP_s390x_linux */ + /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/ diff --git a/include/pub_tool_libcsetjmp.h b/include/pub_tool_libcsetjmp.h index 6b278d285b..a3a386f806 100644 --- a/include/pub_tool_libcsetjmp.h +++ b/include/pub_tool_libcsetjmp.h @@ -126,6 +126,14 @@ UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env)); __attribute__((noreturn)) void VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env)); +#elif defined(VGP_s390x_linux) + +#define VG_MINIMAL_JMP_BUF(_name) ULong _name [10 + 8] +__attribute__((returns_twice)) +UWord VG_MINIMAL_SETJMP(VG_MINIMAL_JMP_BUF(_env)); +__attribute__((noreturn)) +void VG_MINIMAL_LONGJMP(VG_MINIMAL_JMP_BUF(_env)); + #else /* The default implementation. */ |