|
From: Jeremy F. <je...@go...> - 2005-02-01 22:52:59
|
CVS commit by fitzhardinge:
Add option to call helper functions with a direct call rather than
indirectly via the baseBlock.
M +10 -3 core.h 1.73
M +86 -53 vg_from_ucode.c 1.86
M +3 -0 vg_main.c 1.240
M +11 -9 vg_translate.c 1.96
M +10 -4 vg_transtab.c 1.38
--- valgrind/coregrind/core.h #1.72:1.73
@@ -301,4 +301,6 @@ extern Bool VG_(clo_run_libc_freeres);
/* Use the basic-block chaining optimisation? Default: YES */
extern Bool VG_(clo_chain_bb);
+/* Use direct jumps to helper functions? Default: YES */
+extern Bool VG_(clo_direct_helpers);
/* Generate branch-prediction hints? */
extern Bool VG_(clo_branchpred);
@@ -999,5 +1001,5 @@ extern void VG_(demangle) ( Char* orig,
------------------------------------------------------------------ */
-extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes, UShort jumps[VG_MAX_JUMPS] );
+extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes );
extern void VG_(print_ccall_stats) ( void );
@@ -1005,4 +1007,5 @@ extern void VG_(print_UInstr_histogram
extern void VG_(unchain_jumpsite) ( Addr jumpsite );
+extern void VG_(reloc_abs_jump) ( UChar *jmp );
extern Addr VG_(get_jmp_dest) ( Addr jumpsite );
@@ -1026,4 +1029,8 @@ struct _UCodeBlock {
UInstr* instrs;
Int nextTemp;
+
+ UShort *relocs;
+ Int nrelocs;
+ UShort jumps[VG_MAX_JUMPS];
};
@@ -1645,5 +1652,5 @@ extern void VG_(init_tt_tc) ( void
extern void VG_(add_to_trans_tab) ( Addr orig_addr, Int orig_size,
Addr trans_addr, Int trans_size,
- UShort jumps[VG_MAX_JUMPS]);
+ UCodeBlock *cb );
extern Addr VG_(search_transtab) ( Addr original_addr );
@@ -1676,5 +1683,5 @@ extern UInt VG_(run_innerloop) ( void );
/* The patching routing called when a BB wants to chain itself to
another. */
-extern UInt VG_(patch_me);
+extern UChar VG_(patch_me)[];
/* ---------------------------------------------------------------------
--- valgrind/coregrind/vg_from_ucode.c #1.85:1.86
@@ -44,15 +44,5 @@
/*------------------------------------------------------------*/
-/* [2001-07-08 This comment is now somewhat out of date.]
-
- This is straightforward but for one thing: to facilitate generating
- code in a single pass, we generate position-independent code. To
- do this, calls and jmps to fixed addresses must specify the address
- by first loading it into a register, and jump to/call that
- register. Fortunately, the only jump to a literal is the jump back
- to vg_dispatch, and only %eax is live then, conveniently. UCode
- call insns may only have a register as target anyway, so there's no
- need to do anything fancy for them.
-
+/*
The emit_* routines constitute the lowest level of instruction
emission. They simply emit the sequence of bytes corresponding to
@@ -64,5 +54,6 @@
routines. These detect impossible operand combinations and turn
them into sequences of legal instructions. Finally, emitUInstr is
- phrased in terms of the synth_* abstraction layer. */
+ phrased in terms of the synth_* abstraction layer.
+*/
/* Static state for the current basic block */
@@ -71,8 +62,13 @@ static Int emitted_code_used;
static Int emitted_code_size;
-/* offset (in bytes into the basic block) */
+/* offset of jump sites (in bytes into the basic block) */
static UShort jumps[VG_MAX_JUMPS];
static Int jumpidx;
+/* offset of addresses which need relocation (absolute->relative) */
+static UShort *relocs; /* array of relocation offsets */
+static Int nrelocs; /* number of relocations */
+static Int relocsz; /* size of reloc array */
+
static enum _eflags_state {
UPD_Simd, /* baseblock copy is up to date */
@@ -112,6 +108,26 @@ static void reset_state(void)
jumpidx = 0;
eflags_state = UPD_Simd;
+
+ relocs = NULL;
+ nrelocs = relocsz = 0;
}
+static void add_reloc(UShort off)
+{
+ if (nrelocs == relocsz) {
+ if (relocsz == 0) {
+ vg_assert(relocs == NULL);
+ relocsz = 2;
+ relocs = VG_(arena_malloc)(VG_AR_JITTER, sizeof(*relocs) * relocsz);
+ } else {
+ vg_assert(relocs != NULL);
+ relocsz *= 2;
+ relocs = VG_(arena_realloc)(VG_AR_JITTER, relocs, VG_MIN_MALLOC_SZB,
+ sizeof(*relocs) * relocsz);
+ }
+ }
+ vg_assert(nrelocs < relocsz);
+ relocs[nrelocs++] = off;
+}
/* Statistics about C functions called from generated code. */
@@ -1964,4 +1980,19 @@ void VG_(emit_call_reg) ( Int reg )
static
+void emit_call_abs(Bool simd_flags, Addr target,
+ FlagSet use_flag, FlagSet set_flag)
+{
+ /* Used for helpers which expect to see Simd flags in Real flags */
+ VG_(new_emit)(simd_flags, use_flag, set_flag);
+
+ add_reloc(emitted_code_used);
+ VG_(emitB)(0xE8);
+ VG_(emitL)(target);
+
+ if (dis)
+ VG_(printf)( "\n\t\tcall %p\n", target );
+}
+
+static
void emit_call_star_EBP_off ( Bool simd_flags, Int byte_off,
FlagSet use_flag, FlagSet set_flag )
@@ -2181,21 +2212,10 @@ static Bool is_chained_jumpsite(Addr a)
}
-static
-Bool is_fresh_jumpsite(UChar *cp)
-{
- return
- cp[0] == 0x0F && /* UD2 */
- cp[1] == 0x0B &&
- cp[2] == 0x0F && /* UD2 */
- cp[3] == 0x0B &&
- cp[4] == 0x90; /* NOP */
-}
-
-/* Predicate used in sanity checks elsewhere - returns true if all
- jump-sites are calls to VG_(patch_me) */
+/* Predicate used in sanity checks elsewhere - returns true if the
+ jump-site is a call to VG_(patch_me) */
static Bool is_unchained_jumpsite(Addr a)
{
UChar *cp = (UChar *)a;
- Int delta = ((Addr)&VG_(patch_me)) - (a + VG_PATCHME_CALLSZ);
+ Int delta = ((Addr)VG_(patch_me)) - (a + VG_PATCHME_CALLSZ);
Int idelta;
@@ -2231,5 +2251,5 @@ Addr VG_(get_jmp_dest)(Addr a)
void VG_(unchain_jumpsite)(Addr a)
{
- Int delta = ((Addr)&VG_(patch_me)) - (a + VG_PATCHME_CALLSZ);
+ Int delta = ((Addr)VG_(patch_me)) - (a + VG_PATCHME_CALLSZ);
UChar *cp = (UChar *)a;
@@ -2237,5 +2257,4 @@ void VG_(unchain_jumpsite)(Addr a)
return; /* don't write unnecessarily */
- if (!is_fresh_jumpsite(cp))
VG_(bb_dechain_count)++; /* update stats */
@@ -2247,11 +2266,22 @@ void VG_(unchain_jumpsite)(Addr a)
}
-/* This doesn't actually generate a call to VG_(patch_me), but
- reserves enough space in the instruction stream for it to happen
- and records the offset into the jump table. This is because call
- is a relative jump, and so will be affected when this code gets
- moved about. The translation table will "unchain" this basic block
- on insertion (with VG_(unchain_BB)()), and thereby generate a
- proper call instruction. */
+/* Relocate an direct call or jmp; the address is initially an
+ absolute address; this replaces it with a relative offset. */
+void VG_(reloc_abs_jump)(UChar *instr)
+{
+ Addr *absaddr = (Addr *)(instr+1);
+ Int delta = *absaddr - (Addr)(absaddr + 1);
+
+ vg_assert(*instr == 0xE8 || /* call */
+ *instr == 0xEB); /* jmp */
+
+ *absaddr = delta;
+}
+
+/* Generate a call to VG_(patch_me); this will be properly relocated
+ when this generated code is inserted into the translation cache.
+ If we hit the limit of chained jumps per basic block, then just
+ generate the normal unchained sequence (return to the dispatch
+ loop). */
static void emit_call_patchme( void )
{
@@ -2279,12 +2309,5 @@ static void emit_call_patchme( void )
jumps[jumpidx++] = emitted_code_used;
- VG_(emitB) ( 0x0F ); /* UD2 - undefined instruction */
- VG_(emitB) ( 0x0B );
- VG_(emitB) ( 0x0F ); /* UD2 - undefined instruction */
- VG_(emitB) ( 0x0B );
- VG_(emitB) ( 0x90 ); /* NOP */
-
- if (dis)
- VG_(printf)("\n\t\tud2; ud2; nop /* call VG_(patch_me) */\n");
+ emit_call_abs(False, (Addr)VG_(patch_me), FlagsEmpty, FlagsEmpty);
}
}
@@ -2349,6 +2372,5 @@ static Condcode invertCondition ( Condco
-/* Synthesise a call to *baseBlock[offset], ie,
- call * (4 x offset)(%ebp).
+/* Synthesise a call to a helper.
*/
void VG_(synth_call) ( Bool ensure_shortform, Int word_offset,
@@ -2360,5 +2382,13 @@ void VG_(synth_call) ( Bool ensure_short
vg_assert(word_offset < 32);
}
+ if (VG_(clo_direct_helpers)) {
+ /* Call the helper directly */
+ Addr target = VG_(baseBlock)[word_offset];
+ emit_call_abs ( simd_flags, target, use_flags, set_flags );
+ } else {
+ /* Call the helper via the baseBlock: *baseBlock[offset], ie,
+ call * (4 x offset)(%ebp). */
emit_call_star_EBP_off ( simd_flags, 4 * word_offset, use_flags, set_flags );
+ }
}
@@ -4453,6 +4483,5 @@ static void emitUInstr ( UCodeBlock* cb,
generated code and setting *nbytes to its size. */
UChar* VG_(emit_code) ( UCodeBlock* cb,
- Int* nbytes,
- UShort j[VG_MAX_JUMPS] )
+ Int* nbytes )
{
Int i;
@@ -4503,8 +4532,12 @@ UChar* VG_(emit_code) ( UCodeBlock* cb,
vg_assert(eflags_state != UPD_Real); /* flags can't just be in CPU */
- vg_assert(NULL != j);
vg_assert(jumpidx <= VG_MAX_JUMPS);
for(i = 0; i < jumpidx; i++)
- j[i] = jumps[i];
+ cb->jumps[i] = jumps[i];
+
+ cb->relocs = relocs;
+ cb->nrelocs = nrelocs;
+ relocs = NULL;
+ nrelocs = relocsz = 0;
/* Returns a pointer to the emitted code. This will have to be
--- valgrind/coregrind/vg_translate.c #1.95:1.96
@@ -72,4 +72,6 @@ static UCodeBlock* alloc_UCodeBlock ( Ad
{
UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
+ Int i;
+
cb->orig_eip = orig_eip;
cb->used = 0;
@@ -77,4 +79,8 @@ static UCodeBlock* alloc_UCodeBlock ( Ad
cb->instrs = NULL;
cb->nextTemp = 0;
+ cb->relocs = NULL;
+ cb->nrelocs = 0;
+ for(i = 0; i < VG_MAX_JUMPS; i++)
+ cb->jumps[i] = -1;
return cb;
}
@@ -91,4 +97,5 @@ void VG_(free_UCodeBlock) ( UCodeBlock*
{
if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
+ if (cb->relocs) VG_(arena_free)(VG_AR_JITTER, cb->relocs);
VG_(arena_free)(VG_AR_CORE, cb);
}
@@ -2433,6 +2440,5 @@ Bool VG_(translate) ( ThreadId tid, Addr
{
Addr trans_addr, redir, orig_addr0 = orig_addr;
- UShort jumps[VG_MAX_JUMPS];
- Int i, orig_size, trans_size;
+ Int orig_size, trans_size;
UCodeBlock* cb;
Bool notrace_until_done;
@@ -2445,7 +2451,4 @@ Bool VG_(translate) ( ThreadId tid, Addr
beforeLiveness = True;
- for (i = 0; i < VG_MAX_JUMPS; i++)
- jumps[i] = (UShort)-1;
-
/* Look in the code redirect table to see if we should
translate an alternative address for orig_addr. */
@@ -2557,7 +2560,6 @@ Bool VG_(translate) ( ThreadId tid, Addr
VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
VGP_PUSHCC(VgpFromUcode);
- trans_addr = (Addr)VG_(emit_code)(cb, &trans_size, jumps );
+ trans_addr = (Addr)VG_(emit_code)(cb, &trans_size );
VGP_POPCC(VgpFromUcode);
- VG_(free_UCodeBlock)(cb);
#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
@@ -2573,10 +2575,10 @@ Bool VG_(translate) ( ThreadId tid, Addr
// Note that we use orig_addr0, not orig_addr, which might have been
// changed by the redirection
- VG_(add_to_trans_tab)( orig_addr0, orig_size, trans_addr, trans_size,
- jumps );
+ VG_(add_to_trans_tab)( orig_addr0, orig_size, trans_addr, trans_size, cb );
}
/* Free the intermediary -- was allocated by VG_(emit_code). */
VG_(arena_free)( VG_AR_JITTER, (void*)trans_addr );
+ VG_(free_UCodeBlock)(cb);
VGP_POPCC(VgpTranslate);
--- valgrind/coregrind/vg_transtab.c #1.37:1.38
@@ -559,5 +559,5 @@ Int VG_(get_bbs_translated) ( void )
void VG_(add_to_trans_tab) ( Addr orig_addr, Int orig_size,
Addr trans_addr, Int trans_size,
- UShort jumps[VG_MAX_JUMPS])
+ UCodeBlock *cb )
{
Int i, nBytes, trans_size_aligned;
@@ -569,4 +569,8 @@ void VG_(add_to_trans_tab) ( Addr orig_a
*/
+ if (VG_(clo_trace_codegen) & 1)
+ VG_(message)(Vg_DebugMsg, "add_to_trans_tab: inserting code for %p intro transtab at %p\n",
+ orig_addr, trans_addr);
+
vg_assert(offsetof(TCEntry, payload) == VG_CODE_OFFSET);
@@ -587,5 +591,5 @@ void VG_(add_to_trans_tab) ( Addr orig_a
tce->trans_size = (UShort)trans_size_aligned;
for (i = 0; i < VG_MAX_JUMPS; i++) {
- tce->jump_sites[i] = jumps[i];
+ tce->jump_sites[i] = cb->jumps[i];
}
for (i = 0; i < trans_size; i++) {
@@ -593,5 +597,7 @@ void VG_(add_to_trans_tab) ( Addr orig_a
}
- unchain_tce(tce);
+ /* relocate absolute jumps */
+ for(i = 0; i < cb->nrelocs; i++)
+ VG_(reloc_abs_jump)(&tce->payload[cb->relocs[i]]);
add_tt_entry(tce);
--- valgrind/coregrind/vg_main.c #1.239:1.240
@@ -1500,4 +1500,5 @@ Bool VG_(clo_run_libc_freeres) = True;
Bool VG_(clo_track_fds) = False;
Bool VG_(clo_chain_bb) = True;
+Bool VG_(clo_direct_helpers) = True;
Bool VG_(clo_show_below_main) = False;
Bool VG_(clo_pointercheck) = True;
@@ -1558,4 +1559,5 @@ void usage ( Bool debug_help )
" --profile=no|yes profile? (tool must be built for it) [no]\n"
" --chain-bb=no|yes do basic-block chaining? [yes]\n"
+" --direct-helpers=yes|no call helpers directly? [yes]\n"
" --branchpred=yes|no generate branch prediction hints [no]\n"
" --trace-codegen=<XXXXX> show generated code? (X = 0|1) [00000]\n"
@@ -1715,4 +1717,5 @@ static void process_cmd_line_options( UI
else VG_BOOL_CLO("--branchpred", VG_(clo_branchpred))
else VG_BOOL_CLO("--chain-bb", VG_(clo_chain_bb))
+ else VG_BOOL_CLO("--direct-helpers", VG_(clo_direct_helpers))
else VG_BOOL_CLO("--db-attach", VG_(clo_db_attach))
else VG_BOOL_CLO("--demangle", VG_(clo_demangle))
|