|
From: <sv...@va...> - 2012-04-22 17:38:55
|
florian 2012-04-22 18:38:46 +0100 (Sun, 22 Apr 2012)
New Revision: 2308
Log:
tchain optimisation for s390 (VEX bits)
Loading a 64-bit immediate into a register requires 4 insns on a
z900 machine, the oldest model supported. Depending on hardware
capabilities, newer machines can do the same using 2 insns.
Naturally, we want to take advantage of that.
However, currently, in disp_cp_chain_me_to_slowEP/fastEP we assume that
the length of loading a 64-bit immediate is a compile time constant:
S390_TCHAIN_LOAD64_LEN
For what we want to do this constant needs to be a runtime constant.
So in this patch we move this address arithmetic out of the dispatch
code. The general idea being that the value in %r1 does not need to
be adjusted to recover the place to patch. Upon reaching
disp_cp_chain_me_to_slowEP/fastEP %r1 contains the correct address.
Modified files:
trunk/priv/host_s390_defs.c
trunk/pub/libvex_s390x_common.h
Modified: trunk/pub/libvex_s390x_common.h (+0 -6)
===================================================================
--- trunk/pub/libvex_s390x_common.h 2012-04-22 04:48:52 +01:00 (rev 2307)
+++ trunk/pub/libvex_s390x_common.h 2012-04-22 18:38:46 +01:00 (rev 2308)
@@ -87,12 +87,6 @@
/* Number of double words needed to store all facility bits. */
#define S390_NUM_FACILITY_DW 2
-/* The length of the instructions issued by s390_tchain_load64 */
-#define S390_TCHAIN_LOAD64_LEN 16
-
-/* The length of the call insn (BASR) used in translation chaining */
-#define S390_TCHAIN_CALL_LEN 2
-
#endif /* __LIBVEX_PUB_S390X_H */
/*--------------------------------------------------------------------*/
Modified: trunk/priv/host_s390_defs.c (+44 -30)
===================================================================
--- trunk/priv/host_s390_defs.c 2012-04-22 04:48:52 +01:00 (rev 2307)
+++ trunk/priv/host_s390_defs.c 2012-04-22 18:38:46 +01:00 (rev 2308)
@@ -7230,12 +7230,11 @@
return p[0] == 0x07 && p[1] == (0xF0 | reg); /* BCR 15,reg */
}
-static __inline__ Bool
-s390_insn_is_BASR(const UChar *p, UChar link_reg, UChar other_reg)
-{
- return p[0] == 0x0D && p[1] == ((link_reg << 4) | other_reg);
-}
+/* The length of the BASR insn */
+#define S390_BASR_LEN 2
+
+
/* Load the 64-bit VALUE into REG. Note that this function must NOT
optimise the generated code by looking at the value. I.e. using
LGHI if value == 0 would be very wrong.
@@ -7262,7 +7261,7 @@
static UInt
s390_tchain_load64_len(void)
{
- return S390_TCHAIN_LOAD64_LEN;
+ return 4 + 4 + 4 + 4; /* IIHH + IIHL + IILH + IILL */
}
/* Verify that CODE is the code sequence generated by s390_tchain_load64
@@ -7367,24 +7366,23 @@
buf = s390_emit_STG(buf, R0, 0, b, DISP20(d));
- /* --- FIRST PATCHABLE BYTE follows --- */
- /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
- to) backs up the return address, so as to find the address of
- the first patchable byte. So: don't change the length of the
- two instructions below. */
-
/* Load the chosen entry point into the scratch reg */
void *disp_cp_chain_me;
disp_cp_chain_me =
insn->variant.xdirect.to_fast_entry ? disp_cp_chain_me_to_fastEP
: disp_cp_chain_me_to_slowEP;
+ /* Get the address of the beginning of the load64 code sequence into %r1.
+ Do not change the register! This is part of the protocol with the
+ dispatcher. */
+ buf = s390_emit_BASR(buf, 1, R0);
+ /* --- FIRST PATCHABLE BYTE follows (must not modify %r1) --- */
ULong addr = Ptr_to_ULong(disp_cp_chain_me);
buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, addr);
- /* call *tchain_scratch */
- buf = s390_emit_BASR(buf, 1, S390_REGNO_TCHAIN_SCRATCH);
+ /* goto *tchain_scratch */
+ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
/* --- END of PATCHABLE BYTES --- */
@@ -7404,7 +7402,7 @@
static UInt
s390_xdirect_patchable_len(void)
{
- return s390_tchain_load64_len() + S390_TCHAIN_CALL_LEN;
+ return s390_tchain_load64_len() + S390_BASR_LEN;
}
@@ -7452,7 +7450,7 @@
/* load tchain_scratch, #disp_indir */
buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH,
Ptr_to_ULong(disp_cp_xindir));
- /* BR *tchain_direct */
+ /* goto *tchain_direct */
buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
/* Fix up the conditional jump, if there was one. */
@@ -7533,7 +7531,7 @@
buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH,
Ptr_to_ULong(disp_cp_xassisted));
- /* BR *tchain_direct */
+ /* goto *tchain_direct */
buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
/* Fix up the conditional jump, if there was one. */
@@ -7808,21 +7806,21 @@
void *disp_cp_chain_me_EXPECTED,
void *place_to_jump_to)
{
- /* What we're expecting to see @ PLACE_TI_CHAIN is:
+ /* What we're expecting to see @ PLACE_TO_CHAIN is:
- load tchain-scratch, #disp_cp_chain_me_EXPECTED
- BASR 1,S390_REGNO_TCHAIN_SCRATCH
+ load tchain_scratch, #disp_cp_chain_me_EXPECTED
+ goto *tchain_scratch
*/
const UChar *next;
next = s390_tchain_verify_load64(place_to_chain, S390_REGNO_TCHAIN_SCRATCH,
Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
- vassert(s390_insn_is_BASR(next, 1, S390_REGNO_TCHAIN_SCRATCH));
+ vassert(s390_insn_is_BR(next, S390_REGNO_TCHAIN_SCRATCH));
/* And what we want to change it to is either:
(general case):
- load tchain_scratch, #place_to_jump_to
- BR *tchain_scratch
+ load tchain_scratch, #place_to_jump_to
+ goto *tchain_scratch
---OR---
@@ -7872,12 +7870,12 @@
p[i] = 0x00;
} else {
/*
- load tchain_scratch, #place_to_jump_to
- BR *tchain_scratch
+ load tchain_scratch, #place_to_jump_to
+ goto *tchain_scratch
*/
ULong addr = Ptr_to_ULong(place_to_jump_to);
p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr);
- s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
+ /* There is not need to emit a BCR here, as it is already there. */
}
VexInvalRange vir = {0, 0};
@@ -7894,8 +7892,8 @@
{
/* What we're expecting to see @ PLACE_TO_UNCHAIN:
- load tchain_scratch, #place_to_jump_to_EXPECTED
- BR *tchain_scratch
+ load tchain_scratch, #place_to_jump_to_EXPECTED
+ goto *tchain_scratch
---OR---
in the case where the displacement falls within 32 bits
@@ -7905,6 +7903,8 @@
*/
UChar *p = place_to_unchain;
+ Bool uses_short_form = False;
+
if (s390_insn_is_BRCL(p, S390_CC_ALWAYS)) {
/* Looks like the short form */
Int num_hw = *(Int *)&p[2];
@@ -7915,6 +7915,7 @@
Int i;
for (i = 0; i < s390_xdirect_patchable_len() - 6; ++i)
vassert(p[6+i] == 0x00);
+ uses_short_form = True;
} else {
/* Should be the long form */
const UChar *next;
@@ -7928,12 +7929,25 @@
/* And what we want to change it to is:
load tchain_scratch, #disp_cp_chain_me
- call *tchain_scratch
+ goto *tchain_scratch
*/
+
+ /* Get the address of the beginning of the load64 code sequence into %r1.
+ Do not change the register! This is part of the protocol with the
+ dispatcher.
+ Note: the incoming argument PLACE_TO_CHAIN points to the beginning of the
+ load64 insn sequence. That sequence is prefixed with a BASR to get its
+ address (see s390_insn_xdirect_emit). */
+ p = s390_emit_BASR(p - S390_BASR_LEN, 1, R0);
+
ULong addr = Ptr_to_ULong(disp_cp_chain_me);
p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr);
- s390_emit_BASR(p, 1, S390_REGNO_TCHAIN_SCRATCH);
+ /* Emit the BCR in case the short form was used. In case of the long
+ form, the BCR is already there. */
+ if (uses_short_form)
+ s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
+
VexInvalRange vir = {0, 0};
return vir;
}
|