|
From: <sv...@va...> - 2005-12-15 19:54:01
|
Author: njn
Date: 2005-12-15 19:53:50 +0000 (Thu, 15 Dec 2005)
New Revision: 5350
Log:
Merge in r5345 and r5346 from trunk -- dispatcher improvements that speed
things up a lot.
Modified:
branches/COMPVBITS/coregrind/m_dispatch/dispatch-amd64-linux.S
branches/COMPVBITS/coregrind/m_dispatch/dispatch-x86-linux.S
branches/COMPVBITS/coregrind/m_scheduler/scheduler.c
branches/COMPVBITS/coregrind/m_translate.c
branches/COMPVBITS/coregrind/pub_core_dispatch.h
Modified: branches/COMPVBITS/coregrind/m_dispatch/dispatch-amd64-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_dispatch/dispatch-amd64-linux.S 2005-1=
2-15 19:41:14 UTC (rev 5349)
+++ branches/COMPVBITS/coregrind/m_dispatch/dispatch-amd64-linux.S 2005-1=
2-15 19:53:50 UTC (rev 5350)
@@ -1,8 +1,8 @@
=20
-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address. ---##
-##--- dispatch-amd64.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-amd64.S ---*/
+/*--------------------------------------------------------------------*/
=20
/*
This file is part of Valgrind, a dynamic binary instrumentation
@@ -39,11 +39,19 @@
/*--- The dispatch loop. ---*/
/*------------------------------------------------------------*/
=20
-/* signature: UWord VG_(run_innerloop) ( void* guest_state ) */
+/*----------------------------------------------------*/
+/*--- Preamble (set everything up) ---*/
+/*----------------------------------------------------*/
=20
+/* signature:
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+*/
+
+.text
.globl VG_(run_innerloop)
VG_(run_innerloop):
/* %rdi holds guest_state */
+ /* %rsi holds do_profiling */
=09
/* ----- entry point to VG_(run_innerloop) ----- */
pushq %rbx
@@ -59,12 +67,13 @@
pushq %r13
pushq %r14
pushq %r15
- pushq %rdi
+ pushq %rdi /* guest_state */
=20
- movq VG_(dispatch_ctr)@GOTPCREL(%rip), %rsi
- pushq (%rsi)
+ movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
+ movl (%r15), %r15d
+ pushq %r15
=20
- /* 8(%rsp) holds cached copy of guest_state */
+ /* 8(%rsp) holds cached copy of guest_state ptr */
/* 0(%rsp) holds cached copy of VG_(dispatch_ctr) */
=20
/* Set up the guest state pointer */
@@ -90,12 +99,26 @@
/* set dir flag to known value */
cld
=20
- /* fall into main loop */
+ /* fall into main loop (the right one) */
+ cmpq $0, %rsi
+ je VG_(run_innerloop__dispatch_unprofiled)
+ jmp VG_(run_innerloop__dispatch_profiled)
+ /*NOTREACHED*/=09
=20
- /* Here, %rax is the only live (real) register. The entire
- simulated state is saved in the ThreadState. */
+/*----------------------------------------------------*/
+/*--- NO-PROFILING (standard) dispatcher ---*/
+/*----------------------------------------------------*/
=20
-dispatch_boring:
+.align 16
+.global VG_(run_innerloop__dispatch_unprofiled)
+VG_(run_innerloop__dispatch_unprofiled):
+ /* AT ENTRY: %rax is next guest addr, %rbp is possibly
+ modified guest state ptr */
+
+ /* Has the guest state pointer been messed with? If yes, exit. */
+ cmpq 8(%rsp), %rbp
+ jnz gsp_changed
+
/* save the jump address in the guest state */
movq %rax, OFFSET_amd64_RIP(%rbp)
=20
@@ -104,40 +127,99 @@
jz counter_is_zero
=20
/* try a fast lookup in the translation cache */
- movq %rax, %rbx
- andq $VG_TT_FAST_MASK, %rbx
- movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
- movq (%rcx,%rbx,8), %rcx
- cmpq %rax, (%rcx)
- jnz fast_lookup_failed
- /* increment bb profile counter */
- movq VG_(tt_fastN)@GOTPCREL(%rip), %rdx
- movq (%rdx,%rbx,8), %rdx
- incl (%rdx)
+ movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
+ movq %rax, %rbx
+ andq $VG_TT_FAST_MASK, %rbx
+ movq (%rcx,%rbx,8), %rcx
+ cmpq %rax, (%rcx)
+ jnz fast_lookup_failed
=20
/* Found a match. Call tce[1], which is 8 bytes along, since
each tce element is a 64-bit int. */
addq $8, %rcx
- call *%rcx
+ jmp *%rcx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_unprofiled). */
+ /*NOTREACHED*/
=20
- /*=20
- %rax holds destination (original) address.
- %rbp indicates further details of the control transfer
- requested to the address in %rax.
-=09
- If rbp is unchanged (=3D=3D * 8(%rsp)), just jump next to %rax.
+/*----------------------------------------------------*/
+/*--- PROFILING dispatcher (can be much slower) ---*/
+/*----------------------------------------------------*/
=20
- Otherwise fall out, back to the scheduler, and let it
- figure out what to do next.
- */
+.align 16
+.global VG_(run_innerloop__dispatch_profiled)
+VG_(run_innerloop__dispatch_profiled):
+ /* AT ENTRY: %rax is next guest addr, %rbp is possibly
+ modified guest state ptr */
=20
+ /* Has the guest state pointer been messed with? If yes, exit. */
cmpq 8(%rsp), %rbp
- jz dispatch_boring
+ jnz gsp_changed
=20
- jmp dispatch_exceptional
+ /* save the jump address in the guest state */
+ movq %rax, OFFSET_amd64_RIP(%rbp)
=20
+ /* Are we out of timeslice? If yes, defer to scheduler. */
+ subl $1, 0(%rsp)
+ jz counter_is_zero
=20
+ /* try a fast lookup in the translation cache */
+ movq VG_(tt_fast)@GOTPCREL(%rip), %rcx
+ movq %rax, %rbx
+ andq $VG_TT_FAST_MASK, %rbx
+ movq (%rcx,%rbx,8), %rcx
+ cmpq %rax, (%rcx)
+ jnz fast_lookup_failed
=20
+ /* increment bb profile counter */
+ movq VG_(tt_fastN)@GOTPCREL(%rip), %rdx
+ movq (%rdx,%rbx,8), %rdx
+ addl $1, (%rdx)
+
+ /* Found a match. Call tce[1], which is 8 bytes along, since
+ each tce element is a 64-bit int. */
+ addq $8, %rcx
+ jmp *%rcx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_profiled). */
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- exit points ---*/
+/*----------------------------------------------------*/
+
+gsp_changed:
+ /* Someone messed with the gsp. Have to
+ defer to scheduler to resolve this. dispatch ctr
+ is not yet decremented, so no need to increment. */
+ /* %RIP is NOT up to date here. First, need to write
+ %rax back to %RIP, but without trashing %rbp since
+ that holds the value we want to return to the scheduler.
+ Hence use %r15 transiently for the guest state pointer. */
+ movq 8(%rsp), %r15
+ movq %rax, OFFSET_amd64_RIP(%r15)
+ movq %rbp, %rax
+ jmp run_innerloop_exit
+ /*NOTREACHED*/
+
+counter_is_zero:
+ /* %RIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, 0(%rsp)
+ movq $VG_TRC_INNER_COUNTERZERO, %rax
+ jmp run_innerloop_exit
+
+fast_lookup_failed:
+ /* %RIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, 0(%rsp)
+ movq $VG_TRC_INNER_FASTMISS, %rax
+ jmp run_innerloop_exit
+
+
+
/* All exits from the dispatcher go through here. %rax holds
the return value.=20
*/
@@ -150,14 +232,14 @@
pushq $0
fstcw (%rsp)
cmpl $0x027F, (%rsp)
- popq %r11 /* get rid of the word without trashing %eflags */
+ popq %r15 /* get rid of the word without trashing %eflags */
jnz invariant_violation
#endif
pushq $0
stmxcsr (%rsp)
andl $0xFFFFFFC0, (%rsp) /* mask out status flags */
cmpl $0x1F80, (%rsp)
- popq %r11
+ popq %r15
jnz invariant_violation
/* otherwise we're OK */
jmp run_innerloop_exit_REALLY
@@ -167,8 +249,12 @@
jmp run_innerloop_exit_REALLY
=20
run_innerloop_exit_REALLY:
- movq VG_(dispatch_ctr)@GOTPCREL(%rip), %rsi
- popq (%rsi)
+
+ /* restore VG_(dispatch_ctr) */=09
+ popq %r14
+ movq VG_(dispatch_ctr)@GOTPCREL(%rip), %r15
+ movl %r14d, (%r15)
+
popq %rdi
popq %r15
popq %r14
@@ -190,31 +276,13 @@
/* Other ways of getting out of the inner loop. Placed out-of-line to
make it look cleaner.=20
*/
-dispatch_exceptional:
- /* this is jumped to only, not fallen-through from above */
=20
- /* save %rax in %RIP and defer to sched */
- movq 8(%rsp), %rdi
- movq %rax, OFFSET_amd64_RIP(%rdi)
- movq %rbp, %rax
- jmp run_innerloop_exit
=20
-fast_lookup_failed:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, 0(%rsp)
- movq $VG_TRC_INNER_FASTMISS, %rax
- jmp run_innerloop_exit
=20
-counter_is_zero:
- /* %RIP is up to date here since dispatch_boring dominates */
- addl $1, 0(%rsp)
- movq $VG_TRC_INNER_COUNTERZERO, %rax
- jmp run_innerloop_exit
=20
-
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
=20
-##--------------------------------------------------------------------##
-##--- end ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Modified: branches/COMPVBITS/coregrind/m_dispatch/dispatch-x86-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_dispatch/dispatch-x86-linux.S 2005-12-=
15 19:41:14 UTC (rev 5349)
+++ branches/COMPVBITS/coregrind/m_dispatch/dispatch-x86-linux.S 2005-12-=
15 19:53:50 UTC (rev 5350)
@@ -1,8 +1,8 @@
=20
-##--------------------------------------------------------------------##
-##--- The core dispatch loop, for jumping to a code address. ---##
-##--- dispatch-x86.S ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address. ---*/
+/*--- dispatch-x86.S ---*/
+/*--------------------------------------------------------------------*/
=20
/*
This file is part of Valgrind, a dynamic binary instrumentation
@@ -39,11 +39,18 @@
/*--- The dispatch loop. ---*/
/*------------------------------------------------------------*/
=20
-/* signature: UWord VG_(run_innerloop) ( void* guest_state ) */
+/*----------------------------------------------------*/
+/*--- Preamble (set everything up) ---*/
+/*----------------------------------------------------*/
=20
+/* signature:
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+*/
+.text
.globl VG_(run_innerloop)
VG_(run_innerloop):
/* 4(%esp) holds guest_state */
+ /* 8(%esp) holds do_profiling */
=09
/* ----- entry point to VG_(run_innerloop) ----- */
pushl %ebx
@@ -54,6 +61,7 @@
pushl %ebp
=09
/* 28(%esp) holds guest_state */
+ /* 32(%esp) holds do_profiling */
=20
/* Set up the guest state pointer */
movl 28(%esp), %ebp
@@ -80,52 +88,128 @@
/* set dir flag to known value */
cld
=09
- /* fall into main loop */
+ /* fall into main loop (the right one) */
+ cmpl $0, 32(%esp) /* do_profiling */
+ je VG_(run_innerloop__dispatch_unprofiled)
+ jmp VG_(run_innerloop__dispatch_profiled)
+ /*NOTREACHED*/
=20
- /* Here, %eax is the only live (real) register. The entire
- simulated state is saved in the ThreadState. */
+/*----------------------------------------------------*/
+/*--- NO-PROFILING (standard) dispatcher ---*/
+/*----------------------------------------------------*/
=20
-dispatch_boring:
+.align 16
+.global VG_(run_innerloop__dispatch_unprofiled)
+VG_(run_innerloop__dispatch_unprofiled):
+ /* AT ENTRY: %eax is next guest addr, %ebp is possibly
+ modified guest state ptr */
+
+ /* Has the guest state pointer been messed with? If yes, exit. */
+ cmpl 28(%esp), %ebp
+ jnz gsp_changed
+
/* save the jump address in the guest state */
movl %eax, OFFSET_x86_EIP(%ebp)
=20
/* Are we out of timeslice? If yes, defer to scheduler. */
- subl $1, VG_(dispatch_ctr)
+ subl $1, VG_(dispatch_ctr)
jz counter_is_zero
=20
/* try a fast lookup in the translation cache */
- movl %eax, %ebx
- andl $VG_TT_FAST_MASK, %ebx
- movl VG_(tt_fast)(,%ebx,4), %ecx
- cmpl %eax, (%ecx)
- jnz fast_lookup_failed
- /* increment bb profile counter */
- movl VG_(tt_fastN)(,%ebx,4), %edx
- incl (%edx)
+ movl %eax, %ebx
+ andl $VG_TT_FAST_MASK, %ebx
+ movl VG_(tt_fast)(,%ebx,4), %ecx
+ cmpl %eax, (%ecx)
+ jnz fast_lookup_failed
=20
- /* Found a match. Call tce[1], which is 8 bytes along, since
- each tce element is a 64-bit int. */
+ /* Found a match. Jump to tce[1], which is 8 bytes along,
+ since each tce element is a 64-bit int. */
addl $8, %ecx
- call *%ecx
-=09
- /*=20
- %eax holds destination (original) address.
- %ebp indicates further details of the control transfer
- requested to the address in %eax.
-=09
- If ebp is unchanged (=3D=3D * 28(%esp)), just jump next to %eax.
+ jmp *%ecx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_unprofiled). */
+ /*NOTREACHED*/
=20
- Otherwise fall out, back to the scheduler, and let it
- figure out what to do next.
- */
+/*----------------------------------------------------*/
+/*--- PROFILING dispatcher (can be much slower) ---*/
+/*----------------------------------------------------*/
=20
+.align 16
+.global VG_(run_innerloop__dispatch_profiled)
+VG_(run_innerloop__dispatch_profiled):
+ /* AT ENTRY: %eax is next guest addr, %ebp is possibly
+ modified guest state ptr */
+
+ /* Has the guest state pointer been messed with? If yes, exit. */
cmpl 28(%esp), %ebp
- jz dispatch_boring
+ jnz gsp_changed
=20
- jmp dispatch_exceptional
+ /* save the jump address in the guest state */
+ movl %eax, OFFSET_x86_EIP(%ebp)
=20
-=09
+ /* Are we out of timeslice? If yes, defer to scheduler. */
+ subl $1, VG_(dispatch_ctr)
+ jz counter_is_zero
=20
+ /* try a fast lookup in the translation cache */
+ movl %eax, %ebx
+ andl $VG_TT_FAST_MASK, %ebx
+ movl VG_(tt_fast)(,%ebx,4), %ecx
+ cmpl %eax, (%ecx)
+ jnz fast_lookup_failed
+ /* increment bb profile counter */
+ /* note: innocuous as this sounds, it causes a huge amount more
+ stress on D1 and significantly slows everything down. */
+ movl VG_(tt_fastN)(,%ebx,4), %edx
+ /* Use "addl $1", not "incl", to avoid partial-flags stall on P4 */
+ addl $1, (%edx)
+
+ /* Found a match. Jump to tce[1], which is 8 bytes along,
+ since each tce element is a 64-bit int. */
+ addl $8, %ecx
+ jmp *%ecx
+ ud2 /* persuade insn decoders not to speculate past here */
+ /* generated code should run, then jump back to
+ VG_(run_innerloop__dispatch_profiled). */
+ /*NOTREACHED*/
+
+/*----------------------------------------------------*/
+/*--- exit points ---*/
+/*----------------------------------------------------*/
+
+gsp_changed:
+ /* Someone messed with the gsp. Have to
+ defer to scheduler to resolve this. dispatch ctr
+ is not yet decremented, so no need to increment. */
+ /* %EIP is NOT up to date here. First, need to write
+ %eax back to %EIP, but without trashing %ebp since
+ that holds the value we want to return to the scheduler.
+ Hence use %esi transiently for the guest state pointer. */
+ movl 28(%esp), %esi
+ movl %eax, OFFSET_x86_EIP(%esi)
+ movl %ebp, %eax
+ jmp run_innerloop_exit
+ /*NOTREACHED*/
+
+counter_is_zero:
+ /* %EIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, VG_(dispatch_ctr)
+ movl $VG_TRC_INNER_COUNTERZERO, %eax
+ jmp run_innerloop_exit
+ /*NOTREACHED*/
+
+fast_lookup_failed:
+ /* %EIP is up to date here */
+ /* back out decrement of the dispatch counter */
+ addl $1, VG_(dispatch_ctr)
+ movl $VG_TRC_INNER_FASTMISS, %eax
+ jmp run_innerloop_exit
+ /*NOTREACHED*/
+
+
+
/* All exits from the dispatcher go through here. %eax holds
the return value.=20
*/
@@ -165,36 +249,10 @@
popl %ebx
ret=09
=20
-
-
-/* Other ways of getting out of the inner loop. Placed out-of-line to
- make it look cleaner.=20
-*/
-dispatch_exceptional:
- /* this is jumped to only, not fallen-through from above */
-
- /* save %eax in %EIP and defer to sched */
- movl 28(%esp), %edi
- movl %eax, OFFSET_x86_EIP(%edi)
- movl %ebp, %eax
- jmp run_innerloop_exit
-
-fast_lookup_failed:
- /* %EIP is up to date here since dispatch_boring dominates */
- addl $1, VG_(dispatch_ctr)
- movl $VG_TRC_INNER_FASTMISS, %eax
- jmp run_innerloop_exit
-
-counter_is_zero:
- /* %EIP is up to date here since dispatch_boring dominates */
- addl $1, VG_(dispatch_ctr)
- movl $VG_TRC_INNER_COUNTERZERO, %eax
- jmp run_innerloop_exit
-
=09
/* Let the linker know we don't need an executable stack */
.section .note.GNU-stack,"",@progbits
=20
-##--------------------------------------------------------------------##
-##--- end ---##
-##--------------------------------------------------------------------##
+/*--------------------------------------------------------------------*/
+/*--- end ---*/
+/*--------------------------------------------------------------------*/
Modified: branches/COMPVBITS/coregrind/m_scheduler/scheduler.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_scheduler/scheduler.c 2005-12-15 19:41=
:14 UTC (rev 5349)
+++ branches/COMPVBITS/coregrind/m_scheduler/scheduler.c 2005-12-15 19:53=
:50 UTC (rev 5350)
@@ -428,8 +428,12 @@
vg_assert(VG_(my_fault));
VG_(my_fault) =3D False;
=20
- SCHEDSETJMP(tid, jumped,=20
- trc =3D (UInt)VG_(run_innerloop)( (void*)&tst->arch.=
vex ));
+ SCHEDSETJMP(
+ tid,=20
+ jumped,=20
+ trc =3D (UInt)VG_(run_innerloop)( (void*)&tst->arch.vex,
+ VG_(clo_profile_flags) > 0 ? 1 : 0=
)
+ );
=20
//nextEIP =3D tst->arch.m_eip;
//if (nextEIP >=3D VG_(client_end))
Modified: branches/COMPVBITS/coregrind/m_translate.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_translate.c 2005-12-15 19:41:14 UTC (r=
ev 5349)
+++ branches/COMPVBITS/coregrind/m_translate.c 2005-12-15 19:53:50 UTC (r=
ev 5350)
@@ -32,23 +32,25 @@
#include "pub_core_basics.h"
#include "pub_core_aspacemgr.h"
=20
-#include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
- // and VG_(get_SP)
+#include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
+ // and VG_(get_SP)
#include "pub_core_libcbase.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcprint.h"
#include "pub_core_options.h"
#include "pub_core_profile.h"
=20
-#include "pub_core_debuginfo.h" // Needed for pub_core_redir :(
-#include "pub_core_redir.h" // For VG_(code_redirect)()
+#include "pub_core_debuginfo.h" // Needed for pub_core_redir :(
+#include "pub_core_redir.h" // For VG_(code_redirect)()
=20
-#include "pub_core_signals.h" // For VG_(synth_fault_{perms,mappin=
g})()
-#include "pub_core_stacks.h" // For VG_(unknown_SP_update)()
-#include "pub_core_tooliface.h" // For VG_(tdict)
+#include "pub_core_signals.h" // For VG_(synth_fault_{perms,mapping})(=
)
+#include "pub_core_stacks.h" // For VG_(unknown_SP_update)()
+#include "pub_core_tooliface.h" // For VG_(tdict)
#include "pub_core_translate.h"
#include "pub_core_transtab.h"
+#include "pub_core_dispatch.h" // VG_(run_innerloop__dispatch_{un}profi=
led)
=20
+
/*------------------------------------------------------------*/
/*--- Stats ---*/
/*------------------------------------------------------------*/
@@ -569,6 +571,7 @@
VexArch vex_arch;
VexArchInfo vex_archinfo;
VexGuestExtents vge;
+ VexTranslateArgs vta;
VexTranslateResult tres;
=20
/* Make sure Vex is initialised right. */
@@ -690,25 +693,41 @@
/* Set up closure arg for "chase_into_ok" */
chase_into_ok__CLOSURE_tid =3D tid;
=20
- tres =3D LibVEX_Translate (=20
- vex_arch, &vex_archinfo,
- vex_arch, &vex_archinfo,
- (UChar*)ULong_to_Ptr(orig_addr),=20
- (Addr64)orig_addr,=20
- (Addr64)orig_addr_noredir,=20
- chase_into_ok,
- &vge,
- tmpbuf, N_TMPBUF, &tmpbuf_used,
- VG_(tdict).tool_instrument,
- need_to_handle_SP_assignment()
- ? vg_SP_update_pass
- : NULL,
- True, /* cleanup after instrumentation */
- do_self_check,
- NULL,
- verbosity
- );
+ vta.arch_guest =3D vex_arch;
+ vta.archinfo_guest =3D vex_archinfo;
+ vta.arch_host =3D vex_arch;
+ vta.archinfo_host =3D vex_archinfo;
+ vta.guest_bytes =3D (UChar*)ULong_to_Ptr(orig_addr);
+ vta.guest_bytes_addr =3D (Addr64)orig_addr;
+ vta.guest_bytes_addr_noredir =3D (Addr64)orig_addr_noredir;
+ vta.chase_into_ok =3D chase_into_ok;
+ vta.guest_extents =3D &vge;
+ vta.host_bytes =3D tmpbuf;
+ vta.host_bytes_size =3D N_TMPBUF;
+ vta.host_bytes_used =3D &tmpbuf_used;
+ vta.instrument1 =3D VG_(tdict).tool_instrument;
+ vta.instrument2 =3D need_to_handle_SP_assignment()
+ ? vg_SP_update_pass
+ : NULL;
+ vta.do_self_check =3D do_self_check;
+ vta.traceflags =3D verbosity;
=20
+ /* Set up the dispatch-return info. For archs without a link
+ register, vex generates a jump back to the specified dispatch
+ address. Else, it just generates a branch-to-LR. */
+# if defined(VGA_x86) || defined(VGA_amd64)
+ vta.dispatch =3D VG_(clo_profile_flags) > 0
+ ? (void*) &VG_(run_innerloop__dispatch_profiled)
+ : (void*) &VG_(run_innerloop__dispatch_unprofiled);
+# elif defined(VGA_ppc32) || defined(VGA_ppc64)
+ vta.dispatch =3D NULL;
+# else
+# error "Unknown arch"
+# endif
+
+ /* Sheesh. Finally, actually _do_ the translation! */
+ tres =3D LibVEX_Translate ( &vta );
+
vg_assert(tres =3D=3D VexTransOK);
vg_assert(tmpbuf_used <=3D N_TMPBUF);
vg_assert(tmpbuf_used > 0);
Modified: branches/COMPVBITS/coregrind/pub_core_dispatch.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/pub_core_dispatch.h 2005-12-15 19:41:14 =
UTC (rev 5349)
+++ branches/COMPVBITS/coregrind/pub_core_dispatch.h 2005-12-15 19:53:50 =
UTC (rev 5350)
@@ -50,12 +50,24 @@
signal, for example SIGSEGV, in which case control longjmp()s back pa=
st
here.
=20
+ If do_profiling is nonzero, the profile counters arrays should be
+ updated for each translation run.
+
This code simply handles the common case fast -- when the translation
address is found in the translation cache. For anything else, the
scheduler does the work.
*/
-extern UWord VG_(run_innerloop) ( void* guest_state );
+extern=20
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
=20
+#if defined(VGA_x86) || defined(VGA_amd64)
+/* We need to locate a couple of labels inside VG_(run_innerloop), so
+ that Vex can add branches to them from generated code. Hence the
+ following somewhat bogus decls. At least on x86 and amd64. */
+extern void VG_(run_innerloop__dispatch_unprofiled);
+extern void VG_(run_innerloop__dispatch_profiled);
+#endif
+
#endif // __PUB_CORE_DISPATCH_H
=20
/*--------------------------------------------------------------------*/
|