|
From: <sv...@va...> - 2012-04-20 23:59:04
|
sewardj 2012-04-21 00:58:55 +0100 (Sat, 21 Apr 2012)
New Revision: 12517
Log:
Merge branches/TCHAIN from r12476 (its creation point) into trunk.
Copied files:
trunk/docs/internals/t-chaining-notes.txt
(from rev 12516, branches/TCHAIN/docs/internals/t-chaining-notes.txt)
Modified directories:
trunk/
Modified files:
trunk/cachegrind/cg-x86-amd64.c
trunk/callgrind/docs/callgrind_annotate-manpage.xml
trunk/callgrind/docs/callgrind_control-manpage.xml
trunk/coregrind/m_coredump/coredump-macho.c
trunk/coregrind/m_dispatch/dispatch-amd64-linux.S
trunk/coregrind/m_dispatch/dispatch-arm-linux.S
trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
trunk/coregrind/m_dispatch/dispatch-ppc64-linux.S
trunk/coregrind/m_dispatch/dispatch-s390x-linux.S
trunk/coregrind/m_dispatch/dispatch-x86-linux.S
trunk/coregrind/m_libcproc.c
trunk/coregrind/m_main.c
trunk/coregrind/m_scheduler/scheduler.c
trunk/coregrind/m_translate.c
trunk/coregrind/m_transtab.c
trunk/coregrind/m_xarray.c
trunk/coregrind/pub_core_dispatch.h
trunk/coregrind/pub_core_dispatch_asm.h
trunk/coregrind/pub_core_libcproc.h
trunk/coregrind/pub_core_transtab.h
trunk/coregrind/pub_core_transtab_asm.h
trunk/docs/Makefile.am
trunk/docs/xml/design-impl.xml
trunk/drd/drd_load_store.c
trunk/drd/tests/atomic_var.stderr.exp
trunk/drd/tests/circular_buffer.stderr.exp
trunk/drd/tests/tc23_bogus_condwait.stderr.exp-linux-ppc
trunk/drd/tests/tc23_bogus_condwait.stderr.exp-linux-x86
trunk/drd/tests/unit_bitmap.c
trunk/drd/tests/unit_bitmap.stderr.exp
trunk/drd/tests/unit_bitmap.vgtest
trunk/glibc-2.X.supp.in
trunk/helgrind/hg_main.c
trunk/include/pub_tool_xarray.h
trunk/memcheck/mc_main.c
trunk/memcheck/tests/unit_oset.c
trunk/mpi/Makefile.am
trunk/mpi/libmpiwrap.c
trunk/mpi/mpiwrap_type_test.c
Modified: trunk/
Property changed: trunk/mpi/Makefile.am (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Property changed: trunk/drd/tests/unit_bitmap.vgtest (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Modified: trunk/coregrind/pub_core_dispatch_asm.h (+9 -5)
===================================================================
--- trunk/coregrind/pub_core_dispatch_asm.h 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/pub_core_dispatch_asm.h 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -43,16 +43,20 @@
/* And some more of our own. These must not have the same values as
those from libvex_trc_values.h. (viz, 60 or below is safe).
+ (The following comment is no longer relevant, but is retained
+ for historical purposes.)
These values *must* be odd (have bit 0 set) because the dispatchers
(coregrind/m_dispatch/dispatch-*-*.S) use this fact to distinguish
a TRC value from the unchanged baseblock pointer -- which has 0 as
its lowest bit.
*/
-#define VG_TRC_BORING 29 /* no event; just keep going */
-#define VG_TRC_INNER_FASTMISS 37 /* TRC only; means fast-cache miss. */
-#define VG_TRC_INNER_COUNTERZERO 41 /* TRC only; means bb ctr == 0 */
-#define VG_TRC_FAULT_SIGNAL 43 /* TRC only; got sigsegv/sigbus */
-#define VG_TRC_INVARIANT_FAILED 47 /* TRC only; invariant violation */
+#define VG_TRC_BORING 29 /* no event; just keep going */
+#define VG_TRC_INNER_FASTMISS 37 /* TRC only; means fast-cache miss. */
+#define VG_TRC_INNER_COUNTERZERO 41 /* TRC only; means bb ctr == 0 */
+#define VG_TRC_FAULT_SIGNAL 43 /* TRC only; got sigsegv/sigbus */
+#define VG_TRC_INVARIANT_FAILED 47 /* TRC only; invariant violation */
+#define VG_TRC_CHAIN_ME_TO_SLOW_EP 49 /* TRC only; chain to slow EP */
+#define VG_TRC_CHAIN_ME_TO_FAST_EP 51 /* TRC only; chain to fast EP */
#endif // __PUB_CORE_DISPATCH_ASM_H
Modified: trunk/coregrind/m_xarray.c (+14 -0)
===================================================================
--- trunk/coregrind/m_xarray.c 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/m_xarray.c 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -311,6 +311,20 @@
xa->usedsizeE -= n;
}
+void VG_(removeIndexXA)( XArray* xao, Word n )
+{
+ struct _XArray* xa = (struct _XArray*)xao;
+ vg_assert(xa);
+ vg_assert(n >= 0);
+ vg_assert(n < xa->usedsizeE);
+ if (n+1 < xa->usedsizeE) {
+ VG_(memmove)( ((char*)xa->arr) + (n+0) * xa->elemSzB,
+ ((char*)xa->arr) + (n+1) * xa->elemSzB,
+ (xa->usedsizeE - n - 1) * xa->elemSzB );
+ }
+ xa->usedsizeE--;
+}
+
void VG_(getContentsXA_UNSAFE)( XArray* xao,
/*OUT*/void** ctsP,
/*OUT*/Word* usedP )
Modified: trunk/coregrind/m_translate.c (+55 -56)
===================================================================
--- trunk/coregrind/m_translate.c 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/m_translate.c 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -280,6 +280,7 @@
bb->tyenv = deepCopyIRTypeEnv(sb_in->tyenv);
bb->next = deepCopyIRExpr(sb_in->next);
bb->jumpkind = sb_in->jumpkind;
+ bb->offsIP = sb_in->offsIP;
delta = 0;
@@ -905,6 +906,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA);
Bool is64 = True;
IRType ty_Word = Ity_I64;
IROp op_CmpNE = Iop_CmpNE64;
@@ -918,6 +920,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA);
Bool is64 = False;
IRType ty_Word = Ity_I32;
IROp op_CmpNE = Iop_CmpNE32;
@@ -969,7 +972,8 @@
mkU(0)
),
Ijk_EmFail,
- is64 ? IRConst_U64(0) : IRConst_U32(0)
+ is64 ? IRConst_U64(0) : IRConst_U32(0),
+ offB_CIA
)
);
@@ -996,6 +1000,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC64State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC64State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA);
Bool is64 = True;
IRType ty_Word = Ity_I64;
IROp op_CmpNE = Iop_CmpNE64;
@@ -1007,6 +1012,7 @@
Int offB_REDIR_SP = offsetof(VexGuestPPC32State,guest_REDIR_SP);
Int offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
Int offB_EMWARN = offsetof(VexGuestPPC32State,guest_EMWARN);
+ Int offB_CIA = offsetof(VexGuestPPC32State,guest_CIA);
Bool is64 = False;
IRType ty_Word = Ity_I32;
IROp op_CmpNE = Iop_CmpNE32;
@@ -1048,7 +1054,8 @@
mkU(0)
),
Ijk_EmFail,
- is64 ? IRConst_U64(0) : IRConst_U32(0)
+ is64 ? IRConst_U64(0) : IRConst_U32(0),
+ offB_CIA
)
);
@@ -1099,6 +1106,7 @@
# if defined(VGP_ppc64_linux)
Int offB_GPR2 = offsetof(VexGuestPPC64State,guest_GPR2);
Int offB_LR = offsetof(VexGuestPPC64State,guest_LR);
+ Int offB_CIA = offsetof(VexGuestPPC64State,guest_CIA);
IRTemp old_R2 = newIRTemp( bb->tyenv, Ity_I64 );
IRTemp old_LR = newIRTemp( bb->tyenv, Ity_I64 );
/* Restore R2 */
@@ -1112,8 +1120,8 @@
blr (hence Ijk_Ret); so we should just mark this jump as Boring,
else one _Call will have resulted in two _Rets. */
bb->jumpkind = Ijk_Boring;
- bb->next = IRExpr_Binop(Iop_And64, IRExpr_RdTmp(old_LR), mkU64(~(3ULL)));
-
+ bb->next = IRExpr_Binop(Iop_And64, IRExpr_RdTmp(old_LR), mkU64(~(3ULL)));
+ bb->offsIP = offB_CIA;
# else
# error Platform is not TOC-afflicted, fortunately
# endif
@@ -1348,7 +1356,7 @@
}
vg_assert(objname);
VG_(printf)(
- "==== SB %d (exec'd %lld) [tid %d] 0x%llx %s %s+0x%llx\n",
+ "==== SB %d (evchecks %lld) [tid %d] 0x%llx %s %s+0x%llx\n",
VG_(get_bbs_translated)(), bbs_done, (Int)tid, addr,
fnname, objname, (ULong)objoff
);
@@ -1461,11 +1469,10 @@
vta.arch_host = vex_arch;
vta.archinfo_host = vex_archinfo;
vta.abiinfo_both = vex_abiinfo;
+ vta.callback_opaque = (void*)&closure;
vta.guest_bytes = (UChar*)ULong_to_Ptr(addr);
vta.guest_bytes_addr = (Addr64)addr;
- vta.callback_opaque = (void*)&closure;
vta.chase_into_ok = chase_into_ok;
- vta.preamble_function = preamble_fn;
vta.guest_extents = &vge;
vta.host_bytes = tmpbuf;
vta.host_bytes_size = N_TMPBUF;
@@ -1486,60 +1493,49 @@
IRSB*,VexGuestLayout*,VexGuestExtents*,
IRType,IRType)
= (IRSB*(*)(void*,IRSB*,VexGuestLayout*,VexGuestExtents*,IRType,IRType))f;
- vta.instrument1 = g;
+ vta.instrument1 = g;
}
/* No need for type kludgery here. */
- vta.instrument2 = need_to_handle_SP_assignment()
- ? vg_SP_update_pass
- : NULL;
- vta.finaltidy = VG_(needs).final_IR_tidy_pass
- ? VG_(tdict).tool_final_IR_tidy_pass
- : NULL;
- vta.needs_self_check = needs_self_check;
- vta.traceflags = verbosity;
+ vta.instrument2 = need_to_handle_SP_assignment()
+ ? vg_SP_update_pass
+ : NULL;
+ vta.finaltidy = VG_(needs).final_IR_tidy_pass
+ ? VG_(tdict).tool_final_IR_tidy_pass
+ : NULL;
+ vta.needs_self_check = needs_self_check;
+ vta.preamble_function = preamble_fn;
+ vta.traceflags = verbosity;
+ vta.addProfInc = VG_(clo_profile_flags) > 0
+ && kind != T_NoRedir;
- /* Set up the dispatch-return info. For archs without a link
- register, vex generates a jump back to the specified dispatch
- address. Else, it just generates a branch-to-LR. */
+ /* Set up the dispatch continuation-point info. If this is a
+ no-redir translation then it cannot be chained, and the chain-me
+ points are set to NULL to indicate that. The indir point must
+ also be NULL, since we can't allow this translation to do an
+ indir transfer -- that would take it back into the main
+ translation cache too.
-# if defined(VGA_x86) || defined(VGA_amd64)
- if (!allow_redirection) {
- /* It's a no-redir translation. Will be run with the
- nonstandard dispatcher VG_(run_a_noredir_translation) and so
- needs a nonstandard return point. */
- vta.dispatch_assisted
- = (void*) &VG_(run_a_noredir_translation__return_point);
- vta.dispatch_unassisted
- = vta.dispatch_assisted;
+ All this is because no-redir translations live outside the main
+ translation cache (in a secondary one) and chaining them would
+ involve more adminstrative complexity that isn't worth the
+ hassle, because we don't expect them to get used often. So
+ don't bother. */
+ if (allow_redirection) {
+ vta.disp_cp_chain_me_to_slowEP
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_slowEP) );
+ vta.disp_cp_chain_me_to_fastEP
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_chain_me_to_fastEP) );
+ vta.disp_cp_xindir
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xindir) );
+ } else {
+ vta.disp_cp_chain_me_to_slowEP = NULL;
+ vta.disp_cp_chain_me_to_fastEP = NULL;
+ vta.disp_cp_xindir = NULL;
}
- else
- if (VG_(clo_profile_flags) > 0) {
- /* normal translation; although we're profiling. */
- vta.dispatch_assisted
- = (void*) &VG_(run_innerloop__dispatch_assisted_profiled);
- vta.dispatch_unassisted
- = (void*) &VG_(run_innerloop__dispatch_unassisted_profiled);
- }
- else {
- /* normal translation and we're not profiling (the normal case) */
- vta.dispatch_assisted
- = (void*) &VG_(run_innerloop__dispatch_assisted_unprofiled);
- vta.dispatch_unassisted
- = (void*) &VG_(run_innerloop__dispatch_unassisted_unprofiled);
- }
+ /* This doesn't involve chaining and so is always allowable. */
+ vta.disp_cp_xassisted
+ = VG_(fnptr_to_fnentry)( &VG_(disp_cp_xassisted) );
-# elif defined(VGA_ppc32) || defined(VGA_ppc64) \
- || defined(VGA_arm) || defined(VGA_s390x)
- /* See comment in libvex.h. This target uses a
- return-to-link-register scheme to get back to the dispatcher, so
- both fields are NULL. */
- vta.dispatch_assisted = NULL;
- vta.dispatch_unassisted = NULL;
-
-# else
-# error "Unknown arch"
-# endif
-
/* Sheesh. Finally, actually _do_ the translation! */
tres = LibVEX_Translate ( &vta );
@@ -1581,8 +1577,11 @@
nraddr,
(Addr)(&tmpbuf[0]),
tmpbuf_used,
- tres.n_sc_extents > 0 );
+ tres.n_sc_extents > 0,
+ tres.offs_profInc,
+ vex_arch );
} else {
+ vg_assert(tres.offs_profInc == -1); /* -1 == unset */
VG_(add_to_unredir_transtab)( &vge,
nraddr,
(Addr)(&tmpbuf[0]),
Property changed: trunk/mpi/mpiwrap_type_test.c (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Property changed: trunk/mpi/libmpiwrap.c (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Modified: trunk/memcheck/tests/unit_oset.c (+1 -0)
===================================================================
--- trunk/memcheck/tests/unit_oset.c 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/memcheck/tests/unit_oset.c 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -27,6 +27,7 @@
#define vgPlain_printf printf
#define vgPlain_memset memset
#define vgPlain_memcpy memcpy
+#define vgPlain_memmove memmove
// Crudely replace some functions (in m_xarray.c, but not needed for
// this unit test) by (hopefully) failing asserts.
Property changed: trunk/drd/tests/circular_buffer.stderr.exp (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Modified: trunk/coregrind/pub_core_libcproc.h (+4 -0)
===================================================================
--- trunk/coregrind/pub_core_libcproc.h 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/pub_core_libcproc.h 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -84,6 +84,10 @@
extern void VG_(do_atfork_parent) ( ThreadId tid );
extern void VG_(do_atfork_child) ( ThreadId tid );
+// icache invalidation
+extern void VG_(invalidate_icache) ( void *ptr, SizeT nbytes );
+
+
#endif // __PUB_CORE_LIBCPROC_H
/*--------------------------------------------------------------------*/
Modified: trunk/coregrind/m_dispatch/dispatch-arm-linux.S (+125 -203)
===================================================================
--- trunk/coregrind/m_dispatch/dispatch-arm-linux.S 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/m_dispatch/dispatch-arm-linux.S 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -1,3 +1,4 @@
+
/*--------------------------------------------------------------------*/
/*--- The core dispatch loop, for jumping to a code address. ---*/
/*--- dispatch-arm-linux.S ---*/
@@ -39,127 +40,128 @@
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/*----------------------------------------------------*/
-/*--- Preamble (set everything up) ---*/
+/*--- Entry and preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
*/
.text
-.globl VG_(run_innerloop)
-VG_(run_innerloop):
- push {r0, r1, r4, r5, r6, r7, r8, r9, fp, lr}
+.global VG_(disp_run_translations)
+VG_(disp_run_translations):
+ /* r0 holds two_words
+ r1 holds guest_state
+ r2 holds host_addr
+ */
+ /* The number of regs in this list needs to be even, in
+ order to keep the stack 8-aligned. */
+ push {r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
/* set FPSCR to vex-required default value */
mov r4, #0
fmxr fpscr, r4
- /* r0 (hence also [sp,#0]) holds guest_state */
- /* r1 holds do_profiling */
- mov r8, r0
- ldr r0, [r8, #OFFSET_arm_R15T]
+ /* Set up the guest state pointer */
+ mov r8, r1
+
+ /* and jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ bx r2
+ /* NOTREACHED */
- /* fall into main loop (the right one) */
- cmp r1, #0 /* do_profiling */
- beq VG_(run_innerloop__dispatch_unprofiled)
- b VG_(run_innerloop__dispatch_profiled)
-
-
/*----------------------------------------------------*/
-/*--- NO-PROFILING (standard) dispatcher ---*/
+/*--- Postamble and exit. ---*/
/*----------------------------------------------------*/
-/* Pairing of insns below is my guesstimate of how dual dispatch would
- work on an A8. JRS, 2011-May-28 */
-
-.global VG_(run_innerloop__dispatch_unprofiled)
-VG_(run_innerloop__dispatch_unprofiled):
+postamble:
+ /* At this point, r1 and r2 contain two
+ words to be returned to the caller. r1
+ holds a TRC value, and r2 optionally may
+ hold another word (for CHAIN_ME exits, the
+ address of the place to patch.) */
- /* AT ENTRY: r0 is next guest addr, r8 is possibly
- modified guest state ptr */
+ /* We're leaving. Check that nobody messed with
+ FPSCR in ways we don't expect. */
+ fmrx r4, fpscr
+ bic r4, #0xF8000000 /* mask out NZCV and QC */
+ bic r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
+ cmp r4, #0
+ beq remove_frame /* we're OK */
+ /* otherwise we have an invariant violation */
+ movw r1, #VG_TRC_INVARIANT_FAILED
+ movw r2, #0
+ /* fall through */
- /* Has the guest state pointer been messed with? If yes, exit. */
- movw r3, #:lower16:VG_(dispatch_ctr)
- tst r8, #1
+remove_frame:
+ /* Restore int regs, including importantly r0 (two_words) */
+ pop {r0, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+ /* Stash return values */
+ str r1, [r0, #0]
+ str r2, [r0, #4]
+ bx lr
- movt r3, #:upper16:VG_(dispatch_ctr)
-
- bne gsp_changed
-
- /* save the jump address in the guest state */
- str r0, [r8, #OFFSET_arm_R15T]
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- ldr r2, [r3]
-
- subs r2, r2, #1
-
- str r2, [r3]
-
- beq counter_is_zero
-
- /* try a fast lookup in the translation cache */
- // r0 = next guest, r1,r2,r3,r4 scratch
- movw r1, #VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK
- movw r4, #:lower16:VG_(tt_fast)
-
- and r2, r1, r0, LSR #1 // r2 = entry #
- movt r4, #:upper16:VG_(tt_fast) // r4 = &VG_(tt_fast)
-
- add r1, r4, r2, LSL #3 // r1 = &tt_fast[entry#]
-
- ldrd r4, r5, [r1, #0] // r4 = .guest, r5 = .host
-
- cmp r4, r0
-
- bne fast_lookup_failed
- // r5: next-host r8: live, gsp
- // r4: next-guest
- // r2: entry #
- // LIVE: r5, r8; all others dead
-
- /* Found a match. Jump to .host. */
- blx r5
- b VG_(run_innerloop__dispatch_unprofiled)
-.ltorg
- /*NOTREACHED*/
-
/*----------------------------------------------------*/
-/*--- PROFILING dispatcher (can be much slower) ---*/
+/*--- Continuation points ---*/
/*----------------------------------------------------*/
-.global VG_(run_innerloop__dispatch_profiled)
-VG_(run_innerloop__dispatch_profiled):
+/* ------ Chain me to slow entry point ------ */
+.global VG_(disp_cp_chain_me_to_slowEP)
+VG_(disp_cp_chain_me_to_slowEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_S, RA) */
+ mov r1, #VG_TRC_CHAIN_ME_TO_SLOW_EP
+ mov r2, lr
+ /* 4 = movw r12, lo16(disp_cp_chain_me_to_slowEP)
+ 4 = movt r12, hi16(disp_cp_chain_me_to_slowEP)
+ 4 = blx r12 */
+ sub r2, r2, #4+4+4
+ b postamble
- /* AT ENTRY: r0 is next guest addr, r8 is possibly
- modified guest state ptr */
+/* ------ Chain me to fast entry point ------ */
+.global VG_(disp_cp_chain_me_to_fastEP)
+VG_(disp_cp_chain_me_to_fastEP):
+ /* We got called. The return address indicates
+ where the patching needs to happen. Collect
+ the return address and, exit back to C land,
+ handing the caller the pair (Chain_me_F, RA) */
+ mov r1, #VG_TRC_CHAIN_ME_TO_FAST_EP
+ mov r2, lr
+ /* 4 = movw r12, lo16(disp_cp_chain_me_to_fastEP)
+ 4 = movt r12, hi16(disp_cp_chain_me_to_fastEP)
+ 4 = blx r12 */
+ sub r2, r2, #4+4+4
+ b postamble
- /* Has the guest state pointer been messed with? If yes, exit. */
- movw r3, #:lower16:VG_(dispatch_ctr)
- tst r8, #1
+/* ------ Indirect but boring jump ------ */
+.global VG_(disp_cp_xindir)
+VG_(disp_cp_xindir):
+ /* Where are we going? */
+ ldr r0, [r8, #OFFSET_arm_R15T]
- movt r3, #:upper16:VG_(dispatch_ctr)
-
- bne gsp_changed
-
- /* save the jump address in the guest state */
- str r0, [r8, #OFFSET_arm_R15T]
-
- /* Are we out of timeslice? If yes, defer to scheduler. */
- ldr r2, [r3]
-
- subs r2, r2, #1
-
- str r2, [r3]
-
- beq counter_is_zero
-
+ /* RM ME -- stats only */
+ movw r1, #:lower16:vgPlain_stats__n_xindirs
+ movt r1, #:upper16:vgPlain_stats__n_xindirs
+ ldr r2, [r1, #0]
+ adds r2, r2, #1
+ str r2, [r1, #0]
+ ldr r2, [r1, #4]
+ adc r2, r2, #0
+ str r2, [r1, #4]
+
/* try a fast lookup in the translation cache */
// r0 = next guest, r1,r2,r3,r4 scratch
movw r1, #VG_TT_FAST_MASK // r1 = VG_TT_FAST_MASK
@@ -174,122 +176,42 @@
cmp r4, r0
- bne fast_lookup_failed
- // r5: next-host r8: live, gsp
- // r4: next-guest
- // r2: entry #
- // LIVE: r5, r8; all others dead
-
- /* increment bb profile counter */
- movw r0, #:lower16:VG_(tt_fastN)
- movt r0, #:upper16:VG_(tt_fastN) // r0 = &tt_fastN[0]
- ldr r0, [r0, r2, LSL #2] // r0 = tt_fast[entry #]
- ldr r3, [r0] // *r0 ++
- add r3, r3, #1
- str r3, [r0]
+ // jump to host if lookup succeeded
+ bxeq r5
- /* Found a match. Jump to .host. */
- blx r5
- b VG_(run_innerloop__dispatch_profiled)
- /*NOTREACHED*/
+ /* otherwise the fast lookup failed */
+ /* RM ME -- stats only */
+ movw r1, #:lower16:vgPlain_stats__n_xindir_misses
+ movt r1, #:upper16:vgPlain_stats__n_xindir_misses
+ ldr r2, [r1, #0]
+ adds r2, r2, #1
+ str r2, [r1, #0]
+ ldr r2, [r1, #4]
+ adc r2, r2, #0
+ str r2, [r1, #4]
-/*----------------------------------------------------*/
-/*--- exit points ---*/
-/*----------------------------------------------------*/
+ mov r1, #VG_TRC_INNER_FASTMISS
+ mov r2, #0
+ b postamble
-gsp_changed:
- // r0 = next guest addr (R15T), r8 = modified gsp
- /* Someone messed with the gsp. Have to
- defer to scheduler to resolve this. dispatch ctr
- is not yet decremented, so no need to increment. */
- /* R15T is NOT up to date here. First, need to write
- r0 back to R15T, but without trashing r8 since
- that holds the value we want to return to the scheduler.
- Hence use r1 transiently for the guest state pointer. */
- ldr r1, [sp, #0]
- str r0, [r1, #OFFSET_arm_R15T]
- mov r0, r8 // "return modified gsp"
- b run_innerloop_exit
- /*NOTREACHED*/
+/* ------ Assisted jump ------ */
+.global VG_(disp_cp_xassisted)
+VG_(disp_cp_xassisted):
+ /* r8 contains the TRC */
+ mov r1, r8
+ mov r2, #0
+ b postamble
-counter_is_zero:
- /* R15T is up to date here */
- /* Back out increment of the dispatch ctr */
- ldr r1, =VG_(dispatch_ctr)
- ldr r2, [r1]
- add r2, r2, #1
- str r2, [r1]
- mov r0, #VG_TRC_INNER_COUNTERZERO
- b run_innerloop_exit
- /*NOTREACHED*/
-
-fast_lookup_failed:
- /* R15T is up to date here */
- /* Back out increment of the dispatch ctr */
- ldr r1, =VG_(dispatch_ctr)
- ldr r2, [r1]
- add r2, r2, #1
- str r2, [r1]
- mov r0, #VG_TRC_INNER_FASTMISS
- b run_innerloop_exit
- /*NOTREACHED*/
+/* ------ Event check failed ------ */
+.global VG_(disp_cp_evcheck_fail)
+VG_(disp_cp_evcheck_fail):
+ mov r1, #VG_TRC_INNER_COUNTERZERO
+ mov r2, #0
+ b postamble
-/* All exits from the dispatcher go through here. %r0 holds
- the return value.
-*/
-run_innerloop_exit:
- /* We're leaving. Check that nobody messed with
- FPSCR in ways we don't expect. */
- fmrx r4, fpscr
- bic r4, #0xF8000000 /* mask out NZCV and QC */
- bic r4, #0x0000009F /* mask out IDC,IXC,UFC,OFC,DZC,IOC */
- cmp r4, #0
- bne invariant_violation
- b run_innerloop_exit_REALLY
-invariant_violation:
- mov r0, #VG_TRC_INVARIANT_FAILED
- b run_innerloop_exit_REALLY
+.size VG_(disp_run_translations), .-VG_(disp_run_translations)
-run_innerloop_exit_REALLY:
- add sp, sp, #8
- pop {r4, r5, r6, r7, r8, r9, fp, pc}
-
-.size VG_(run_innerloop), .-VG_(run_innerloop)
-
-
-/*------------------------------------------------------------*/
-/*--- ---*/
-/*--- A special dispatcher, for running no-redir ---*/
-/*--- translations. Just runs the given translation once. ---*/
-/*--- ---*/
-/*------------------------------------------------------------*/
-
-/* signature:
-void VG_(run_a_noredir_translation) ( UWord* argblock );
-*/
-
-/* Run a no-redir translation. argblock points to 4 UWords, 2 to carry args
- and 2 to carry results:
- 0: input: ptr to translation
- 1: input: ptr to guest state
- 2: output: next guest PC
- 3: output: guest state pointer afterwards (== thread return code)
-*/
-.global VG_(run_a_noredir_translation)
-VG_(run_a_noredir_translation):
- push {r0,r1 /* EABI compliance */, r4-r12, lr}
- ldr r8, [r0, #4]
- mov lr, pc
- ldr pc, [r0, #0]
-
- pop {r1}
- str r0, [r1, #8]
- str r8, [r1, #12]
- pop {r1/*EABI compliance*/,r4-r12, pc}
-
-.size VG_(run_a_noredir_translation), .-VG_(run_a_noredir_translation)
-
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",%progbits
Property changed: trunk/callgrind/docs/callgrind_annotate-manpage.xml (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Property changed: trunk/drd/tests/tc23_bogus_condwait.stderr.exp-linux-x86 (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Copied: trunk/docs/internals/t-chaining-notes.txt (+0 -0)
===================================================================
Property changed: trunk/coregrind/m_coredump/coredump-macho.c (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Property changed: trunk/drd/tests/tc23_bogus_condwait.stderr.exp-linux-ppc (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Property changed: trunk/drd/tests/atomic_var.stderr.exp (+0 -0)
___________________________________________________________________
Name: svn:mergeinfo
-
Modified: trunk/docs/Makefile.am (+1 -0)
===================================================================
--- trunk/docs/Makefile.am 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/docs/Makefile.am 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -44,6 +44,7 @@
internals/register-uses.txt \
internals/release-HOWTO.txt \
internals/segments-seginfos.txt \
+ internals/t-chaining-notes.txt \
internals/threads-syscalls-signals.txt \
internals/tm-mutexstates.dot \
internals/tm-threadstates.dot \
Modified: trunk/coregrind/pub_core_transtab_asm.h (+3 -2)
===================================================================
--- trunk/coregrind/pub_core_transtab_asm.h 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/pub_core_transtab_asm.h 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -42,8 +42,9 @@
ever be used. So instead the function is '(address >>u
2)[VG_TT_FAST_BITS-1 : 0]' on those targets.
- On ARM we do like ppc32/ppc64, although that will have to be
- revisited when we come to implement Thumb.
+ On ARM we shift by 1, since Thumb insns can be of size 2, hence to
+ minimise collisions and maximise cache utilisation we need to take
+ into account all but the least significant bit.
On s390x the rightmost bit of an instruction address is zero.
For best table utilization shift the address to the right by 1 bit. */
Modified: trunk/coregrind/m_scheduler/scheduler.c (+266 -135)
===================================================================
--- trunk/coregrind/m_scheduler/scheduler.c 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/m_scheduler/scheduler.c 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -55,8 +55,23 @@
the OS handles threading and signalling are abstracted away and
implemented elsewhere. [Some of the functions have worked their
way back for the moment, until we do an OS port in earnest...]
- */
+*/
+/* FIXME tchaining tests:
+ - extensive spinrounds
+ - with sched quantum = 1 -- check that handle_noredir_jump
+ doesn't return with INNER_COUNTERZERO
+ other:
+ - out of date comment w.r.t. bit 0 set in libvex_trc_values.h
+ - can VG_TRC_BORING still happen? if not, rm
+ - memory leaks in m_transtab (InEdgeArr/OutEdgeArr leaking?)
+ - move do_cacheflush out of m_transtab
+ - more economical unchaining when nuking an entire sector
+ - ditto w.r.t. cache flushes
+ - verify case of 2 paths from A to B
+ - check -- is IP_AT_SYSCALL still right?
+*/
+
#include "pub_core_basics.h"
#include "pub_core_debuglog.h"
#include "pub_core_vki.h"
@@ -108,9 +123,6 @@
/* If False, a fault is Valgrind-internal (ie, a bug) */
Bool VG_(in_generated_code) = False;
-/* Counts downwards in VG_(run_innerloop). */
-UInt VG_(dispatch_ctr);
-
/* 64-bit counter for the number of basic blocks done. */
static ULong bbs_done = 0;
@@ -130,6 +142,9 @@
static ULong n_scheduling_events_MINOR = 0;
static ULong n_scheduling_events_MAJOR = 0;
+ULong VG_(stats__n_xindirs) = 0;
+ULong VG_(stats__n_xindir_misses) = 0;
+
/* Sanity checking counts. */
static UInt sanity_fast_count = 0;
static UInt sanity_slow_count = 0;
@@ -137,8 +152,13 @@
void VG_(print_scheduler_stats)(void)
{
VG_(message)(Vg_DebugMsg,
- "scheduler: %'llu jumps (bb entries).\n", bbs_done );
+ "scheduler: %'llu event checks.\n", bbs_done );
VG_(message)(Vg_DebugMsg,
+ "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
+ VG_(stats__n_xindirs), VG_(stats__n_xindir_misses),
+ VG_(stats__n_xindirs) / (VG_(stats__n_xindir_misses)
+ ? VG_(stats__n_xindir_misses) : 1));
+ VG_(message)(Vg_DebugMsg,
"scheduler: %'llu/%'llu major/minor sched events.\n",
n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
VG_(message)(Vg_DebugMsg,
@@ -700,14 +720,34 @@
vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
vg_assert(a_vex + 3 * sz_vex == a_spill);
+# if defined(VGA_x86)
+ /* x86 XMM regs must form an array, ie, have no holes in
+ between. */
+ vg_assert(
+ (offsetof(VexGuestX86State,guest_XMM7)
+ - offsetof(VexGuestX86State,guest_XMM0))
+ == (8/*#regs*/-1) * 16/*bytes per reg*/
+ );
+ vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
+ vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
+ vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
+ vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
+ vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
+# endif
+
# if defined(VGA_amd64)
- /* x86/amd64 XMM regs must form an array, ie, have no
- holes in between. */
+ /* amd64 XMM regs must form an array, ie, have no holes in
+ between. */
vg_assert(
(offsetof(VexGuestAMD64State,guest_XMM16)
- offsetof(VexGuestAMD64State,guest_XMM0))
== (17/*#regs*/-1) * 16/*bytes per reg*/
);
+ vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_XMM0)));
+ vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
+ vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
+ vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
+ vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
# endif
# if defined(VGA_ppc32) || defined(VGA_ppc64)
@@ -724,10 +764,10 @@
# if defined(VGA_arm)
/* arm guest_state VFP regs must be 8 byte aligned for
- loads/stores. */
- vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D0));
- vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
- vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
+ loads/stores. Let's use 16 just to be on the safe side. */
+ vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
+ vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
+ vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
/* be extra paranoid .. */
vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
@@ -755,30 +795,86 @@
}
/* Run the thread tid for a while, and return a VG_TRC_* value
- indicating why VG_(run_innerloop) stopped. */
-static UInt run_thread_for_a_while ( ThreadId tid )
+ indicating why VG_(disp_run_translations) stopped, and possibly an
+ auxiliary word. Also, only allow the thread to run for at most
+ *dispatchCtrP events. If (as is the normal case) use_alt_host_addr
+ is False, we are running ordinary redir'd translations, and we
+ should therefore start by looking up the guest next IP in TT. If
+ it is True then we ignore the guest next IP and just run from
+ alt_host_addr, which presumably points at host code for a no-redir
+ translation.
+
+ Return results are placed in two_words. two_words[0] is set to the
+ TRC. In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
+ the address to patch is placed in two_words[1].
+*/
+static
+void run_thread_for_a_while ( /*OUT*/HWord* two_words,
+ /*MOD*/Int* dispatchCtrP,
+ ThreadId tid,
+ HWord alt_host_addr,
+ Bool use_alt_host_addr )
{
- volatile UWord jumped;
- volatile ThreadState* tst = NULL; /* stop gcc complaining */
- volatile UInt trc;
- volatile Int dispatch_ctr_SAVED;
- volatile Int done_this_time;
+ volatile HWord jumped = 0;
+ volatile ThreadState* tst = NULL; /* stop gcc complaining */
+ volatile Int done_this_time = 0;
+ volatile HWord host_code_addr = 0;
/* Paranoia */
vg_assert(VG_(is_valid_tid)(tid));
vg_assert(VG_(is_running_thread)(tid));
vg_assert(!VG_(is_exiting)(tid));
+ vg_assert(*dispatchCtrP > 0);
tst = VG_(get_ThreadState)(tid);
do_pre_run_checks( (ThreadState*)tst );
/* end Paranoia */
- trc = 0;
- dispatch_ctr_SAVED = VG_(dispatch_ctr);
+ /* Clear return area. */
+ two_words[0] = two_words[1] = 0;
+ /* Figure out where we're starting from. */
+ if (use_alt_host_addr) {
+ /* unusual case -- no-redir translation */
+ host_code_addr = alt_host_addr;
+ } else {
+ /* normal case -- redir translation */
+ UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
+ if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
+ host_code_addr = VG_(tt_fast)[cno].host;
+ else {
+ AddrH res = 0;
+ /* not found in VG_(tt_fast). Searching here the transtab
+ improves the performance compared to returning directly
+ to the scheduler. */
+ Bool found = VG_(search_transtab)(&res, NULL, NULL,
+ (Addr)tst->arch.vex.VG_INSTR_PTR,
+ True/*upd cache*/
+ );
+ if (LIKELY(found)) {
+ host_code_addr = res;
+ } else {
+ /* At this point, we know that we intended to start at a
+ normal redir translation, but it was not found. In
+ which case we can return now claiming it's not
+ findable. */
+ two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
+ return;
+ }
+ }
+ }
+ /* We have either a no-redir or a redir translation. */
+ vg_assert(host_code_addr != 0); /* implausible */
+
+
/* there should be no undealt-with signals */
//vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
+ /* Set up event counter stuff for the run. */
+ tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
+ tst->arch.vex.host_EvC_FAILADDR
+ = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
+
if (0) {
vki_sigset_t m;
Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
@@ -790,6 +886,8 @@
VG_(printf)("\n");
}
+ /* Set up return-value area. */
+
// Tell the tool this thread is about to run client code
VG_TRACK( start_client_code, tid, bbs_done );
@@ -799,26 +897,37 @@
SCHEDSETJMP(
tid,
jumped,
- trc = (UInt)VG_(run_innerloop)( (void*)&tst->arch.vex,
- VG_(clo_profile_flags) > 0 ? 1 : 0 )
+ VG_(disp_run_translations)(
+ two_words,
+ (void*)&tst->arch.vex,
+ host_code_addr
+ )
);
vg_assert(VG_(in_generated_code) == True);
VG_(in_generated_code) = False;
- if (jumped != (UWord)0) {
+ if (jumped != (HWord)0) {
/* We get here if the client took a fault that caused our signal
handler to longjmp. */
- vg_assert(trc == 0);
- trc = VG_TRC_FAULT_SIGNAL;
+ vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
+ two_words[0] = VG_TRC_FAULT_SIGNAL;
+ two_words[1] = 0;
block_signals();
}
- done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 0;
+ vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
+ vg_assert(tst->arch.vex.host_EvC_FAILADDR
+ == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
+ done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
+
vg_assert(done_this_time >= 0);
bbs_done += (ULong)done_this_time;
+ *dispatchCtrP -= done_this_time;
+ vg_assert(*dispatchCtrP >= 0);
+
// Tell the tool this thread has stopped running client code
VG_TRACK( stop_client_code, tid, bbs_done );
@@ -832,89 +941,16 @@
VG_(gdbserver) (tid);
}
- return trc;
-}
-
-
-/* Run a no-redir translation just once, and return the resulting
- VG_TRC_* value. */
-static UInt run_noredir_translation ( Addr hcode, ThreadId tid )
-{
- volatile UWord jumped;
- volatile ThreadState* tst;
- volatile UWord argblock[4];
- volatile UInt retval;
-
- /* Paranoia */
- vg_assert(VG_(is_valid_tid)(tid));
- vg_assert(VG_(is_running_thread)(tid));
- vg_assert(!VG_(is_exiting)(tid));
-
- tst = VG_(get_ThreadState)(tid);
- do_pre_run_checks( (ThreadState*)tst );
- /* end Paranoia */
-
-# if defined(VGA_ppc32) || defined(VGA_ppc64)
- /* I don't think we need to clear this thread's guest_RESVN here,
- because we can only get here if run_thread_for_a_while() has
- been used immediately before, on this same thread. */
-# endif
-
- /* There can be 3 outcomes from VG_(run_a_noredir_translation):
-
- - a signal occurred and the sighandler longjmp'd. Then both [2]
- and [3] are unchanged - hence zero.
-
- - translation ran normally, set [2] (next guest IP) and set [3]
- to whatever [1] was beforehand, indicating a normal (boring)
- jump to the next block.
-
- - translation ran normally, set [2] (next guest IP) and set [3]
- to something different from [1] beforehand, which indicates a
- TRC_ value.
- */
- argblock[0] = (UWord)hcode;
- argblock[1] = (UWord)&VG_(threads)[tid].arch.vex;
- argblock[2] = 0; /* next guest IP is written here */
- argblock[3] = 0; /* guest state ptr afterwards is written here */
-
- // Tell the tool this thread is about to run client code
- VG_TRACK( start_client_code, tid, bbs_done );
-
- vg_assert(VG_(in_generated_code) == False);
- VG_(in_generated_code) = True;
-
- SCHEDSETJMP(
- tid,
- jumped,
- VG_(run_a_noredir_translation)( &argblock[0] )
- );
-
- VG_(in_generated_code) = False;
-
- if (jumped != (UWord)0) {
- /* We get here if the client took a fault that caused our signal
- handler to longjmp. */
- vg_assert(argblock[2] == 0); /* next guest IP was not written */
- vg_assert(argblock[3] == 0); /* trc was not written */
- block_signals();
- retval = VG_TRC_FAULT_SIGNAL;
+ /* TRC value and possible auxiliary patch-address word are already
+ in two_words[0] and [1] respectively, as a result of the call to
+ VG_(run_innerloop). */
+ /* Stay sane .. */
+ if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
+ || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
+ vg_assert(two_words[1] != 0); /* we have a legit patch addr */
} else {
- /* store away the guest program counter */
- VG_(set_IP)( tid, argblock[2] );
- if (argblock[3] == argblock[1])
- /* the guest state pointer afterwards was unchanged */
- retval = VG_TRC_BORING;
- else
- retval = (UInt)argblock[3];
+ vg_assert(two_words[1] == 0); /* nobody messed with it */
}
-
- bbs_done++;
-
- // Tell the tool this thread has stopped running client code
- VG_TRACK( stop_client_code, tid, bbs_done );
-
- return retval;
}
@@ -929,13 +965,15 @@
/* Trivial event. Miss in the fast-cache. Do a full
lookup for it. */
- found = VG_(search_transtab)( NULL, ip, True/*upd_fast_cache*/ );
+ found = VG_(search_transtab)( NULL, NULL, NULL,
+ ip, True/*upd_fast_cache*/ );
if (UNLIKELY(!found)) {
/* Not found; we need to request a translation. */
if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
bbs_done, True/*allow redirection*/ )) {
- found = VG_(search_transtab)( NULL, ip, True );
- vg_assert2(found, "VG_TRC_INNER_FASTMISS: missing tt_fast entry");
+ found = VG_(search_transtab)( NULL, NULL, NULL,
+ ip, True );
+ vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
} else {
// If VG_(translate)() fails, it's because it had to throw a
@@ -947,6 +985,43 @@
}
}
+static
+void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
+{
+ Bool found = False;
+ Addr ip = VG_(get_IP)(tid);
+ UInt to_sNo = (UInt)-1;
+ UInt to_tteNo = (UInt)-1;
+
+ found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
+ ip, False/*dont_upd_fast_cache*/ );
+ if (!found) {
+ /* Not found; we need to request a translation. */
+ if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
+ bbs_done, True/*allow redirection*/ )) {
+ found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
+ ip, False );
+ vg_assert2(found, "handle_chain_me: missing tt_fast entry");
+ } else {
+ // If VG_(translate)() fails, it's because it had to throw a
+ // signal because the client jumped to a bad address. That
+ // means that either a signal has been set up for delivery,
+ // or the thread has been marked for termination. Either
+ // way, we just need to go back into the scheduler loop.
+ return;
+ }
+ }
+ vg_assert(found);
+ vg_assert(to_sNo != -1);
+ vg_assert(to_tteNo != -1);
+
+ /* So, finally we know where to patch through to. Do the patching
+ and update the various admin tables that allow it to be undone
+ in the case that the destination block gets deleted. */
+ VG_(tt_tc_do_chaining)( place_to_chain,
+ to_sNo, to_tteNo, toFastEP );
+}
+
static void handle_syscall(ThreadId tid, UInt trc)
{
ThreadState * volatile tst = VG_(get_ThreadState)(tid);
@@ -978,9 +1053,15 @@
/* tid just requested a jump to the noredir version of its current
program counter. So make up that translation if needed, run it,
- and return the resulting thread return code. */
-static UInt/*trc*/ handle_noredir_jump ( ThreadId tid )
+ and return the resulting thread return code in two_words[]. */
+static
+void handle_noredir_jump ( /*OUT*/HWord* two_words,
+ /*MOD*/Int* dispatchCtrP,
+ ThreadId tid )
{
+ /* Clear return area. */
+ two_words[0] = two_words[1] = 0;
+
AddrH hcode = 0;
Addr ip = VG_(get_IP)(tid);
@@ -992,14 +1073,14 @@
found = VG_(search_unredir_transtab)( &hcode, ip );
vg_assert2(found, "unredir translation missing after creation?!");
-
} else {
// If VG_(translate)() fails, it's because it had to throw a
// signal because the client jumped to a bad address. That
// means that either a signal has been set up for delivery,
// or the thread has been marked for termination. Either
// way, we just need to go back into the scheduler loop.
- return VG_TRC_BORING;
+ two_words[0] = VG_TRC_BORING;
+ return;
}
}
@@ -1007,8 +1088,10 @@
vg_assert(found);
vg_assert(hcode != 0);
- /* Otherwise run it and return the resulting VG_TRC_* value. */
- return run_noredir_translation( hcode, tid );
+ /* Otherwise run it and return the resulting VG_TRC_* value. */
+ vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
+ run_thread_for_a_while( two_words, dispatchCtrP, tid,
+ hcode, True/*use hcode*/ );
}
@@ -1020,7 +1103,9 @@
*/
VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
{
- UInt trc;
+ /* Holds the remaining size of this thread's "timeslice". */
+ Int dispatch_ctr = 0;
+
ThreadState *tst = VG_(get_ThreadState)(tid);
static Bool vgdb_startup_action_done = False;
@@ -1079,11 +1164,12 @@
vg_assert(VG_(is_running_thread)(tid));
- VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
+ dispatch_ctr = SCHEDULING_QUANTUM;
while (!VG_(is_exiting)(tid)) {
- if (VG_(dispatch_ctr) == 1) {
+ vg_assert(dispatch_ctr >= 0);
+ if (dispatch_ctr == 0) {
/* Our slice is done, so yield the CPU to another thread. On
Linux, this doesn't sleep between sleeping and running,
@@ -1130,7 +1216,8 @@
exceed zero before entering the innerloop. Also also, the
decrement is done before the bb is actually run, so you
always get at least one decrement even if nothing happens. */
- VG_(dispatch_ctr) = SCHEDULING_QUANTUM + 1;
+ // FIXME is this right?
+ dispatch_ctr = SCHEDULING_QUANTUM;
/* paranoia ... */
vg_assert(tst->tid == tid);
@@ -1142,17 +1229,20 @@
if (0)
VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
- tid, VG_(dispatch_ctr) - 1 );
+ tid, dispatch_ctr - 1 );
- trc = run_thread_for_a_while ( tid );
+ HWord trc[2]; /* "two_words" */
+ run_thread_for_a_while( &trc[0],
+ &dispatch_ctr,
+ tid, 0/*ignored*/, False );
if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
- Char buf[50];
- VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc));
+ HChar buf[50];
+ VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc[0]));
print_sched_event(tid, buf);
}
- if (trc == VEX_TRC_JMP_NOREDIR) {
+ if (trc[0] == VEX_TRC_JMP_NOREDIR) {
/* If we got a request to run a no-redir version of
something, do so now -- handle_noredir_jump just (creates
and) runs that one translation. The flip side is that the
@@ -1160,20 +1250,61 @@
request -- that would be nonsensical. It can, however,
return VG_TRC_BORING, which just means keep going as
normal. */
- trc = handle_noredir_jump(tid);
- vg_assert(trc != VEX_TRC_JMP_NOREDIR);
+ /* Note that the fact that we need to continue with a
+ no-redir jump is not recorded anywhere else in this
+ thread's state. So we *must* execute the block right now
+ -- we can't fail to execute it and later resume with it,
+ because by then we'll have forgotten the fact that it
+ should be run as no-redir, but will get run as a normal
+ potentially-redir'd, hence screwing up. This really ought
+ to be cleaned up, by noting in the guest state that the
+ next block to be executed should be no-redir. Then we can
+ suspend and resume at any point, which isn't the case at
+ the moment. */
+ handle_noredir_jump( &trc[0],
+ &dispatch_ctr,
+ tid );
+ vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
+
+ /* This can't be allowed to happen, since it means the block
+ didn't execute, and we have no way to resume-as-noredir
+ after we get more timeslice. But I don't think it ever
+ can, since handle_noredir_jump will assert if the counter
+ is zero on entry. */
+ vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
+
+ /* A no-redir translation can't return with a chain-me
+ request, since chaining in the no-redir cache is too
+ complex. */
+ vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
+ && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
}
- switch (trc) {
+ switch (trc[0]) {
+ case VEX_TRC_JMP_BORING:
+ /* assisted dispatch, no event. Used by no-redir
+ translations to force return to the scheduler. */
case VG_TRC_BORING:
/* no special event, just keep going. */
break;
case VG_TRC_INNER_FASTMISS:
- vg_assert(VG_(dispatch_ctr) > 1);
+ vg_assert(dispatch_ctr > 0);
handle_tt_miss(tid);
break;
-
+
+ case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
+ if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
+ handle_chain_me(tid, (void*)trc[1], False);
+ break;
+ }
+
+ case VG_TRC_CHAIN_ME_TO_FAST_EP: {
+ if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
+ handle_chain_me(tid, (void*)trc[1], True);
+ break;
+ }
+
case VEX_TRC_JMP_CLIENTREQ:
do_client_request(tid);
break;
@@ -1182,7 +1313,7 @@
case VEX_TRC_JMP_SYS_INT129: /* x86-darwin */
case VEX_TRC_JMP_SYS_INT130: /* x86-darwin */
case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
- handle_syscall(tid, trc);
+ handle_syscall(tid, trc[0]);
if (VG_(clo_sanity_level) > 2)
VG_(sanity_check_general)(True); /* sanity-check every syscall */
break;
@@ -1195,13 +1326,13 @@
before swapping to another. That means that short term
spins waiting for hardware to poke memory won't cause a
thread swap. */
- if (VG_(dispatch_ctr) > 2000)
- VG_(dispatch_ctr) = 2000;
+ if (dispatch_ctr > 2000)
+ dispatch_ctr = 2000;
break;
case VG_TRC_INNER_COUNTERZERO:
/* Timeslice is out. Let a new thread be scheduled. */
- vg_assert(VG_(dispatch_ctr) == 1);
+ vg_assert(dispatch_ctr == 0);
break;
case VG_TRC_FAULT_SIGNAL:
@@ -1346,7 +1477,7 @@
default:
vg_assert2(0, "VG_(scheduler), phase 3: "
- "unexpected thread return code (%u)", trc);
+ "unexpected thread return code (%u)", trc[0]);
/* NOTREACHED */
break;
Modified: trunk/coregrind/m_dispatch/dispatch-s390x-linux.S (+172 -250)
===================================================================
--- trunk/coregrind/m_dispatch/dispatch-s390x-linux.S 2012-04-20 16:42:12 +01:00 (rev 12516)
+++ trunk/coregrind/m_dispatch/dispatch-s390x-linux.S 2012-04-21 00:58:55 -23:00 (rev 12517)
@@ -9,7 +9,8 @@
framework.
Copyright IBM Corp. 2010-2011
-
+ Copyright 2011-2012, Florian Krohm (br...@ac...)
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
@@ -38,10 +39,15 @@
#if defined(VGA_s390x)
+/*
+#define XINDIR_STATS
+*/
+
/*------------------------------------------------------------*/
/*--- ---*/
-/*--- The dispatch loop. VG_(run_innerloop) is used to ---*/
-/*--- run all translations except no-redir ones. ---*/
+/*--- The dispatch loop. VG_(disp_run_translations) is ---*/
+/*--- used to run all translations, ---*/
+/*--- including no-redir ones. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
@@ -55,107 +61,166 @@
/* Location of valgrind's saved FPC register */
#define S390_LOC_SAVED_FPC_V S390_OFFSET_SAVED_FPC_V(SP)
-/* Location of saved guest state pointer */
-#define S390_LOC_SAVED_GSP S390_OFFSET_SAVED_GSP(SP)
-
/* Location of saved R2 register */
#define S390_LOC_SAVED_R2 S390_OFFSET_SAVED_R2(SP)
+
/*----------------------------------------------------*/
-/*--- Preamble (set everything up) ---*/
+/*--- Entry and preamble (set everything up) ---*/
/*----------------------------------------------------*/
/* signature:
-UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+void VG_(disp_run_translations)( UWord* two_words,
+ void* guest_state,
+ Addr host_addr );
+
+ Return results are placed in two_words:
+
+ two_words[0] is set to the TRC
+ two_words[1] is set to the address to patch (in case two_words[0] is
+ VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP). Otherwise, it is 0.
*/
+ .text
+ .align 4
+ .globl VG_(disp_run_translations)
+ .type VG_(disp_run_translations), @function
+VG_(disp_run_translations):
-.text
-.align 4
-.globl VG_(run_innerloop)
-VG_(run_innerloop):
- /* r2 holds address of guest_state */
- /* r3 holds do_profiling (a flag) */
+ /* r2 holds two_words */
+ /* r3 holds pointer to guest_state */
+ /* r4 holds host_addr, i.e. the address of the translation to run */
/* Save gprs ABI: r6...r13 and r15 */
- stmg %r6,%r15,48(SP)
+ stmg %r6,%r15,48(SP)
/* New stack frame */
- aghi SP,-S390_INNERLOOP_FRAME_SIZE
+ aghi SP,-S390_INNERLOOP_FRAME_SIZE
/* Save fprs: ABI: f8...f15 */
- std %f8,160+0(SP)
- std %f9,160+8(SP)
- std %f10,160+16(SP)
- std %f11,160+24(SP)
- std %f12,160+32(SP)
- std %f13,160+40(SP)
- std %f14,160+48(SP)
- std %f15,160+56(SP)
+ std %f8,160+0(SP)
+ std %f9,160+8(SP)
+ std %f10,160+16(SP)
+ std %f11,160+24(SP)
+ std %f12,160+32(SP)
+ std %f13,160+40(SP)
+ std %f14,160+48(SP)
+ std %f15,160+56(SP)
/* Load address of guest state into guest state register (r13) */
- lgr %r13,%r2
+ lgr %r13,%r3
- /* Store address of guest state pointer on stack.
- It will be needed later because upon return from a VEX translation
- r13 may contain a special value. So the old value will be used to
- determine whether r13 contains a special value. */
- stg %r13,S390_LOC_SAVED_GSP
-
- /* Save valgrind's FPC on stack so run_innerloop_exit can restore
+ /* Save R2 on stack. In postamble it will be restored such that the
+ return values can be written */
+ stg %r2,S390_LOC_SAVED_R2
+
+ /* Save valgrind's FPC on stack so postamble can restore
it later . */
stfpc S390_LOC_SAVED_FPC_V
/* Load the FPC the way the client code wants it. I.e. pull the
value from the guest state. */
- lfpc OFFSET_s390x_fpc(%r13)
+ lfpc OFFSET_s390x_fpc(%r13)
- /* Get the IA from the guest state */
- lg %r2,OFFSET_s390x_IA(%r13)
+ /* Jump into the code cache. Chained translations in
+ the code cache run, until for whatever reason, they can't
+ continue. When that happens, the translation in question
+ will jump (or call) to one of the continuation points
+ VG_(cp_...) below. */
+ br %r4
- /* Get VG_(dispatch_ctr) -- a 32-bit value -- and store it in a reg */
- larl %r6,VG_(dispatch_ctr)
- l S390_REGNO_DISPATCH_CTR...
[truncated message content] |