https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=2b635fd318b6920250338cef6af15011b50a03f2
commit 2b635fd318b6920250338cef6af15011b50a03f2
Author: Andreas Arnez <ar...@li...>
Date: Wed Oct 9 17:10:08 2024 +0200
Bug 493970 - s390x: Drop saving/restoring FPC upon helper call
Saving the FPC before each helper call and restoring it afterwards creates
unnecessary overhead, and it may also not be desirable.
Drop it. Also remove the functions in host_s390_defs.c responsible for
emitting LFPC and STFPC instructions. And since this frees up the FPC
save slot on the stack, adjust the stack layout accordingly.
Diff:
---
NEWS | 1 +
VEX/priv/host_s390_defs.c | 41 +----------------------------------------
VEX/pub/libvex_s390x_common.h | 9 +++------
3 files changed, 5 insertions(+), 46 deletions(-)
diff --git a/NEWS b/NEWS
index 59b987d633..319caf68f8 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,7 @@ are not entered into bugzilla tend to get forgotten about or ignored.
493454 Missing FUSE_COMPATIBLE_MAY_BLOCK markers
493507 direct readlink syscall from PRE handler is incompatible with
FUSE_COMPATIBLE_MAY_BLOCK
+493970 s390x: Store/restore FPC upon helper call causes slowdown
n-i-bz Improve messages for sigaltstack errors, use specific
stack_t member names
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
index ee240347d6..583f116c3c 100644
--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
@@ -1706,18 +1706,6 @@ emit_RXY(UChar *p, ULong op, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2
}
-static UChar *
-emit_S(UChar *p, UInt op, UChar b2, UShort d2)
-{
- ULong the_insn = op;
-
- the_insn |= ((ULong)b2) << 12;
- the_insn |= ((ULong)d2) << 0;
-
- return emit_4bytes(p, the_insn);
-}
-
-
static UChar *
emit_SI(UChar *p, UInt op, UChar i2, UChar b1, UShort d1)
{
@@ -3791,16 +3779,6 @@ s390_emit_LDY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
}
-static UChar *
-s390_emit_LFPC(UChar *p, UChar b2, UShort d2)
-{
- if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
- s390_disasm(ENC2(MNM, UDXB), "lfpc", d2, 0, b2);
-
- return emit_S(p, 0xb29d0000, b2, d2);
-}
-
-
static UChar *
s390_emit_LDGR(UChar *p, UChar r1, UChar r2)
{
@@ -3895,16 +3873,6 @@ s390_emit_STDY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
}
-static UChar *
-s390_emit_STFPC(UChar *p, UChar b2, UShort d2)
-{
- if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
- s390_disasm(ENC2(MNM, UDXB), "stfpc", d2, 0, b2);
-
- return emit_S(p, 0xb29c0000, b2, d2);
-}
-
-
static UChar *
s390_emit_AEBR(UChar *p, UChar r1, UChar r2)
{
@@ -10354,14 +10322,7 @@ s390_insn_helper_call_emit(UChar *buf, const s390_insn *insn)
Also, need to arrange for the return address be put into the
link-register */
buf = s390_emit_load_64imm(buf, 1, target);
-
- /* Stash away the client's FPC register because the helper might change it. */
- buf = s390_emit_STFPC(buf, S390_REGNO_STACK_POINTER, S390_OFFSET_SAVED_FPC_C);
-
- buf = s390_emit_BASR(buf, S390_REGNO_LINK_REGISTER, 1); // call helper
-
- buf = s390_emit_LFPC(buf, S390_REGNO_STACK_POINTER, // restore FPC
- S390_OFFSET_SAVED_FPC_C);
+ buf = s390_emit_BASR(buf, S390_REGNO_LINK_REGISTER, 1);
// preElse:
UChar* pPreElse = buf;
diff --git a/VEX/pub/libvex_s390x_common.h b/VEX/pub/libvex_s390x_common.h
index 944347242d..dda5ffd312 100644
--- a/VEX/pub/libvex_s390x_common.h
+++ b/VEX/pub/libvex_s390x_common.h
@@ -52,10 +52,7 @@
/* Dispatcher will save 8 FPRs at offsets 160 + 0 ... 160 + 56 */
/* Where the dispatcher saves the r2 contents. */
-#define S390_OFFSET_SAVED_R2 160+80
-
-/* Where client's FPC register is saved. */
-#define S390_OFFSET_SAVED_FPC_C 160+72
+#define S390_OFFSET_SAVED_R2 160+72
/* Where valgrind's FPC register is saved. */
#define S390_OFFSET_SAVED_FPC_V 160+64
@@ -64,11 +61,11 @@
Need size for
8 FPRs
+ 1 GPR (SAVED_R2)
- + 2 FPCs (SAVED_FPC_C and SAVED_FPC_V).
+ + 1 FPC (SAVED_FPC_V)
Additionally, we need a standard frame for helper functions being called
from client code. (See figure 1-16 in zSeries ABI) */
-#define S390_INNERLOOP_FRAME_SIZE ((8+1+2)*8 + 160)
+#define S390_INNERLOOP_FRAME_SIZE ((8+1+1)*8 + 160)
/*--------------------------------------------------------------*/
|