|
From: Jeremy F. <je...@go...> - 2005-02-01 22:53:33
|
CVS commit by fitzhardinge:
Make dispatch_ctr a piece of per-thread state. This allows a the BB
preamble to use %ebp-relative addressing to decrement it; combined
with the cleanups in remove-baseblock.patch, this halves the size of
the BB preamble from 16 to 8 bytes.
The other reason for doing this is that it makes the semantics of
dispatch_ctr well defined; previously it only made certain that no one
thread could hog the VCPU; now it defines how many BBs a thread can
run before it is preempted.
M +3 -6 core.h 1.76
M +7 -6 vg_from_ucode.c 1.89
M +91 -127 vg_scheduler.c 1.218
M +1 -1 x86/dispatch.S 1.5
M +3 -0 x86/gen_offsets.c 1.2
--- valgrind/coregrind/core.h #1.75:1.76
@@ -643,4 +643,7 @@ struct _ThreadState {
VgSchedReturnCode exitreason;
+ /* Basic blocks remaining before timeslice up */
+ UInt dispatch_ctr;
+
/* Architecture-specific thread state
Leave early in the structure to make offsets small
@@ -765,10 +768,4 @@ extern void VG_(set_sleeping) ( ThreadId
extern void VG_(vg_yield)(void);
-/* Give a hint to the scheduler that it may be a good time to find a
- new runnable thread. If prefer_sched != VG_INVALID_THREADID, then
- try to schedule that thread.
-*/
-extern void VG_(need_resched) ( ThreadId prefer_sched );
-
// The scheduler.
extern VgSchedReturnCode VG_(scheduler) ( ThreadId tid );
--- valgrind/coregrind/vg_from_ucode.c #1.88:1.89
@@ -4428,15 +4428,16 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
if (dis) VG_(printf)("Generated x86 code:\n");
- /* Generate subl $1, VG_(dispatch_ctr) and drop into dispatch if we hit
- zero. We have to do this regardless of whether we're t-chaining
- or not. (The ia32 optimisation guide recommends sub over dec.) */
+ /* Generate subl $1, dispatch_ctr(%ebp) and drop into dispatch if
+ we hit zero. We have to do this regardless of whether we're
+ t-chaining or not. (The ia32 optimisation guide recommends sub
+ over dec.) */
VG_(init_target)(&tgt);
VG_(new_emit)(False, FlagsEmpty, FlagsOSZAP);
- VG_(emitB) (0x83); /* subl */
- emit_amode_litmem_reg((Addr)&VG_(dispatch_ctr), 5);
+ VG_(emitB) (0x83); /* grp1 */
+ VG_(emit_amode_offregmem_reg)(offsetof(ThreadState, dispatch_ctr), R_EBP, mkGrp1opcode(SUB));
VG_(emitB) (0x01);
if (dis)
- VG_(printf)("\n\t\tsubl $1, (%p)\n", &VG_(dispatch_ctr));
+ VG_(printf)("\n\t\tsubl $1, %d(%%ebp)\n", offsetof(ThreadState, dispatch_ctr));
/* Don't bother setting EBP; the dispatch loop can work it out for
--- valgrind/coregrind/vg_scheduler.c #1.217:1.218
@@ -496,4 +496,6 @@ void mostly_clear_thread_record ( Thread
VG_(threads)[tid].status = VgTs_Zombie;
+ VG_(threads)[tid].dispatch_ctr = 0;
+
VG_(threads)[tid].syscallno = -1;
@@ -641,6 +643,4 @@ VgSchedReturnCode VG_(scheduler) ( Threa
{
UInt trc;
- UInt dispatch_ctr_SAVED;
- Int done_this_time;
ThreadState *tst = VG_(get_ThreadState)(tid);
@@ -653,4 +653,16 @@ VgSchedReturnCode VG_(scheduler) ( Threa
while(!VG_(is_exiting)(tid)) {
+ UInt remaining_bbs;
+
+ if (tst->dispatch_ctr == 0) {
+ /* Our slice is done, so yield the CPU to another thread. This
+ doesn't sleep between sleeping and running, since that would
+ take too much time. */
+ VG_(set_sleeping)(tid, VgTs_Yielding);
+ /* nothing */
+ VG_(set_running)(tid);
+ VG_TRACK( thread_run, tid );
+
+ /* OK, do some relatively expensive housekeeping stuff */
scheduler_sanity(tid);
VG_(sanity_check_general)(False);
@@ -663,24 +675,18 @@ VgSchedReturnCode VG_(scheduler) ( Threa
n_scheduling_events_MAJOR++;
- VG_TRACK( thread_run, tid );
-
/* Figure out how many bbs to ask vg_run_innerloop to do. Note
that it decrements the counter before testing it for zero, so
- that if VG_(dispatch_ctr) is set to N you get at most N-1
- iterations. Also this means that VG_(dispatch_ctr) must
+ that if tst->dispatch_ctr is set to N you get at most N-1
+ iterations. Also this means that tst->dispatch_ctr must
exceed zero before entering the innerloop. Also also, the
decrement is done before the bb is actually run, so you
always get at least one decrement even if nothing happens. */
- VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1;
-
- /* ... and remember what we asked for. */
- dispatch_ctr_SAVED = VG_(dispatch_ctr);
+ tst->dispatch_ctr = VG_SCHEDULING_QUANTUM + 1;
/* paranoia ... */
vg_assert(tst->tid == tid);
vg_assert(tst->os_state.lwpid == VG_(gettid)());
+ }
- /* Actually run the thread for a timeslice. */
- while(!VG_(is_exiting)(tid) && VG_(dispatch_ctr) > 0) {
/* For stats purposes only. */
n_scheduling_events_MINOR++;
@@ -688,8 +694,12 @@ VgSchedReturnCode VG_(scheduler) ( Threa
if (0)
VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs",
- tid, VG_(dispatch_ctr) - 1 );
+ tid, tst->dispatch_ctr - 1 );
+
+ remaining_bbs = tst->dispatch_ctr;
trc = run_thread_for_a_while ( tid );
+ VG_(bbs_done) += remaining_bbs - tst->dispatch_ctr;
+
if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
Char buf[50];
@@ -698,10 +708,7 @@ VgSchedReturnCode VG_(scheduler) ( Threa
}
- done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr);
- VG_(bbs_done) += (ULong)done_this_time;
-
switch(trc) {
case VG_TRC_INNER_FASTMISS:
- vg_assert(VG_(dispatch_ctr) > 0);
+ vg_assert(tst->dispatch_ctr > 0);
handle_tt_miss(tid);
break;
@@ -718,10 +725,10 @@ VgSchedReturnCode VG_(scheduler) ( Threa
/* Explicit yield, because this thread is in a spin-lock
or something. Let another thread run ASAP. */
- VG_(dispatch_ctr) = 0;
+ tst->dispatch_ctr = 0;
break;
case VG_TRC_INNER_COUNTERZERO:
/* Timeslice is out. Let a new thread be scheduled. */
- vg_assert(VG_(dispatch_ctr) == 0);
+ vg_assert(tst->dispatch_ctr == 0);
break;
@@ -744,12 +751,4 @@ VgSchedReturnCode VG_(scheduler) ( Threa
}
- /* Our slice is done, so yield the CPU to another thread. This
- doesn't sleep between sleeping and running, since that would
- take too much time. */
- VG_(set_sleeping)(tid, VgTs_Yielding);
- /* nothing */
- VG_(set_running)(tid);
- }
-
vg_assert(VG_(is_exiting)(tid));
@@ -759,39 +758,4 @@ VgSchedReturnCode VG_(scheduler) ( Threa
}
-void VG_(need_resched) ( ThreadId prefer )
-{
- /* Tell the scheduler now might be a good time to find a new
- runnable thread, because something happened which woke a thread
- up.
-
- NB: This can be called unsynchronized from either a signal
- handler, or from another LWP (ie, real kernel thread).
-
- In principle this could simply be a matter of setting
- VG_(dispatch_ctr) to a small value (say, 2), which would make
- any running code come back to the scheduler fairly quickly.
-
- However, since the scheduler implements a strict round-robin
- policy with only one priority level, there are, by definition,
- no better threads to be running than the current thread anyway,
- so we may as well ignore this hint. For processes with a
- mixture of compute and I/O bound threads, this means the compute
- threads could introduce longish latencies before the I/O threads
- run. For programs with only I/O bound threads, need_resched
- won't have any effect anyway.
-
- OK, so I've added command-line switches to enable low-latency
- syscalls and signals. The prefer_sched variable is in effect
- the ID of a single thread which has higher priority than all the
- others. If set, the scheduler will prefer to schedule that
- thread over all others. Naturally, this could lead to
- starvation or other unfairness.
- */
-
- if (VG_(dispatch_ctr) > 10)
- VG_(dispatch_ctr) = 2;
-
- (void)prefer; /* kernel's doing the selection, so we can't use this */
-}
/*
--- valgrind/coregrind/x86/gen_offsets.c #1.1:1.2
@@ -13,4 +13,7 @@ int main(int argc, char **argv)
printf("#define VG_CODE_OFFSET %d\n", offsetof(TCEntry, payload));
+ printf("\n/* offset of dispatch_ctr in ThreadState */\n");
+ printf("#define VGOFF_DISPATCH_CTR %d\n", offsetof(ThreadState, dispatch_ctr));
+
/* register offsets into struct ThreadState */
printf("\n/* register offsets in ThreadState */\n");
--- valgrind/coregrind/x86/dispatch.S #1.4:1.5
@@ -126,5 +126,5 @@
/* Are we out of timeslice? If yes, defer to scheduler. */
- cmpl $0, VG_(dispatch_ctr)
+ cmpl $0, VGOFF_DISPATCH_CTR(%ebx)
jz counter_is_zero
|