|
From: <sv...@va...> - 2007-08-28 06:05:57
|
Author: sewardj
Date: 2007-08-28 07:05:20 +0100 (Tue, 28 Aug 2007)
New Revision: 6787
Log:
Merge, from CGTUNE branch, a cleaned up version of r6742:
Another optimisation: allow tools to provide a final_tidy function
which they can use to mess with the final post-tree-built IR before it
is handed off to instruction selection.
In memcheck, use this to remove redundant calls to
MC_(helperc_value_check0_fail) et al. Gives a 6% reduction in code
size for Memcheck on x86 and a smaller (3% ?) speedup.
Modified:
trunk/coregrind/m_tooliface.c
trunk/coregrind/m_translate.c
trunk/coregrind/pub_core_tooliface.h
trunk/include/pub_tool_tooliface.h
trunk/memcheck/mc_include.h
trunk/memcheck/mc_main.c
trunk/memcheck/mc_translate.c
Modified: trunk/coregrind/m_tooliface.c
===================================================================
--- trunk/coregrind/m_tooliface.c 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/coregrind/m_tooliface.c 2007-08-28 06:05:20 UTC (rev 6787)
@@ -94,6 +94,7 @@
.data_syms = False,
.malloc_replacement = False,
.xml_output = False,
+ .final_IR_tidy_pass = False
};
/* static */
@@ -260,6 +261,13 @@
VG_(tdict).tool_client_redzone_szB = client_malloc_redzone_szB;
}
+void VG_(needs_final_IR_tidy_pass)(
+ IRSB*(*final_tidy)(IRSB*)
+)
+{
+ VG_(needs).final_IR_tidy_pass = True;
+ VG_(tdict).tool_final_IR_tidy_pass = final_tidy;
+}
/*--------------------------------------------------------------------*/
/* Tracked events */
Modified: trunk/coregrind/m_translate.c
===================================================================
--- trunk/coregrind/m_translate.c 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/coregrind/m_translate.c 2007-08-28 06:05:20 UTC (rev 6787)
@@ -1287,6 +1287,9 @@
vta.instrument2 = need_to_handle_SP_assignment()
? vg_SP_update_pass
: NULL;
+ vta.finaltidy = VG_(needs).final_IR_tidy_pass
+ ? VG_(tdict).tool_final_IR_tidy_pass
+ : NULL;
vta.do_self_check = do_self_check;
vta.traceflags = verbosity;
Modified: trunk/coregrind/pub_core_tooliface.h
===================================================================
--- trunk/coregrind/pub_core_tooliface.h 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/coregrind/pub_core_tooliface.h 2007-08-28 06:05:20 UTC (rev 6787)
@@ -91,6 +91,7 @@
Bool data_syms;
Bool malloc_replacement;
Bool xml_output;
+ Bool final_IR_tidy_pass;
}
VgNeeds;
@@ -155,6 +156,9 @@
void* (*tool_realloc) (ThreadId, void*, SizeT);
SizeT tool_client_redzone_szB;
+ // VG_(needs).final_IR_tidy_pass
+ IRSB* (*tool_final_IR_tidy_pass) (IRSB*);
+
// -- Event tracking functions ------------------------------------
void (*track_new_mem_startup) (Addr, SizeT, Bool, Bool, Bool);
void (*track_new_mem_stack_signal)(Addr, SizeT);
Modified: trunk/include/pub_tool_tooliface.h
===================================================================
--- trunk/include/pub_tool_tooliface.h 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/include/pub_tool_tooliface.h 2007-08-28 06:05:20 UTC (rev 6787)
@@ -438,6 +438,12 @@
* it". */
extern void VG_(needs_xml_output)( void );
+/* Does the tool want to have one final pass over the IR after tree
+ building but before instruction selection? If so specify the
+ function here. */
+extern void VG_(needs_final_IR_tidy_pass) ( IRSB*(*final_tidy)(IRSB*) );
+
+
/* ------------------------------------------------------------------ */
/* Core events to track */
Modified: trunk/memcheck/mc_include.h
===================================================================
--- trunk/memcheck/mc_include.h 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/memcheck/mc_include.h 2007-08-28 06:05:20 UTC (rev 6787)
@@ -320,6 +320,9 @@
VexGuestExtents* vge,
IRType gWordTy, IRType hWordTy );
+extern
+IRSB* MC_(final_tidy) ( IRSB* );
+
#endif /* ndef __MC_INCLUDE_H */
/*--------------------------------------------------------------------*/
Modified: trunk/memcheck/mc_main.c
===================================================================
--- trunk/memcheck/mc_main.c 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/memcheck/mc_main.c 2007-08-28 06:05:20 UTC (rev 6787)
@@ -4969,6 +4969,9 @@
MC_(instrument),
mc_fini);
+ VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
+
+
VG_(needs_core_errors) ();
VG_(needs_tool_errors) (mc_eq_Error,
mc_pp_Error,
Modified: trunk/memcheck/mc_translate.c
===================================================================
--- trunk/memcheck/mc_translate.c 2007-08-28 05:19:16 UTC (rev 6786)
+++ trunk/memcheck/mc_translate.c 2007-08-28 06:05:20 UTC (rev 6787)
@@ -37,6 +37,9 @@
#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
#include "mc_include.h"
+#include "pub_tool_xarray.h"
+#include "pub_tool_mallocfree.h"
+#include "pub_tool_libcbase.h"
/* This file implements the Memcheck instrumentation, and in
particular contains the core of its undefined value detection
@@ -3523,6 +3526,149 @@
return bb;
}
+/*------------------------------------------------------------*/
+/*--- Post-tree-build final tidying ---*/
+/*------------------------------------------------------------*/
+
+/* This exploits the observation that Memcheck often produces
+ repeated conditional calls of the form
+
+ Dirty G MC_(helperc_value_check0/1/4/8_fail)()
+
+ with the same guard expression G guarding the same helper call.
+ The second and subsequent calls are redundant. This usually
+ results from instrumentation of guest code containing multiple
+ memory references at different constant offsets from the same base
+ register. After optimisation of the instrumentation, you get a
+ test for the definedness of the base register for each memory
+ reference, which is kinda pointless. MC_(final_tidy) therefore
+ looks for such repeated calls and removes all but the first. */
+
+/* A struct for recording which (helper, guard) pairs we have already
+ seen. */
+typedef
+ struct { void* entry; IRExpr* guard; }
+ Pair;
+
+/* Return True if e1 and e2 definitely denote the same value (used to
+ compare guards). Return False if unknown; False is the safe
+ answer. Since guest registers and guest memory do not have the
+ SSA property we must return False if any Gets or Loads appear in
+ the expression. */
+
+static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
+{
+ if (e1->tag != e2->tag)
+ return False;
+ switch (e1->tag) {
+ case Iex_Const:
+ return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
+ case Iex_Binop:
+ return e1->Iex.Binop.op == e2->Iex.Binop.op
+ && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
+ && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
+ case Iex_Unop:
+ return e1->Iex.Unop.op == e2->Iex.Unop.op
+ && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
+ case Iex_RdTmp:
+ return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
+ case Iex_Mux0X:
+ return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
+ && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
+ && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
+ case Iex_Qop:
+ case Iex_Triop:
+ case Iex_CCall:
+ /* be lazy. Could define equality for these, but they never
+ appear to be used. */
+ return False;
+ case Iex_Get:
+ case Iex_GetI:
+ case Iex_Load:
+ /* be conservative - these may not give the same value each
+ time */
+ return False;
+ case Iex_Binder:
+ /* should never see this */
+ /* fallthrough */
+ default:
+ VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
+ ppIRExpr(e1);
+ VG_(tool_panic)("memcheck:sameIRValue");
+ return False;
+ }
+}
+
+/* See if 'pairs' already has an entry for (entry, guard). Return
+ True if so. If not, add an entry. */
+
+static
+Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
+{
+ Pair p;
+ Pair* pp;
+ Int i, n = VG_(sizeXA)( pairs );
+ for (i = 0; i < n; i++) {
+ pp = VG_(indexXA)( pairs, i );
+ if (pp->entry == entry && sameIRValue(pp->guard, guard))
+ return True;
+ }
+ p.guard = guard;
+ p.entry = entry;
+ VG_(addToXA)( pairs, &p );
+ return False;
+}
+
+static Bool is_helperc_value_checkN_fail ( HChar* name )
+{
+ return
+ 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail)")
+ || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail)")
+ || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail)")
+ || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail)");
+}
+
+IRSB* MC_(final_tidy) ( IRSB* sb_in )
+{
+ Int i;
+ IRStmt* st;
+ IRDirty* di;
+ IRExpr* guard;
+ IRCallee* cee;
+ Bool alreadyPresent;
+ XArray* pairs = VG_(newXA)( VG_(malloc), VG_(free), sizeof(Pair) );
+ /* Scan forwards through the statements. Each time a call to one
+ of the relevant helpers is seen, check if we have made a
+ previous call to the same helper using the same guard
+ expression, and if so, delete the call. */
+ for (i = 0; i < sb_in->stmts_used; i++) {
+ st = sb_in->stmts[i];
+ tl_assert(st);
+ if (st->tag != Ist_Dirty)
+ continue;
+ di = st->Ist.Dirty.details;
+ guard = di->guard;
+ if (!guard)
+ continue;
+ if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
+ cee = di->cee;
+ if (!is_helperc_value_checkN_fail( cee->name ))
+ continue;
+ /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
+ guard 'guard'. Check if we have already seen a call to this
+ function with the same guard. If so, delete it. If not,
+ add it to the set of calls we do know about. */
+ alreadyPresent = check_or_add( pairs, guard, cee->addr );
+ if (alreadyPresent) {
+ sb_in->stmts[i] = IRStmt_NoOp();
+ if (0) VG_(printf)("XX\n");
+ }
+ }
+ VG_(deleteXA)( pairs );
+ return sb_in;
+}
+
+
/*--------------------------------------------------------------------*/
/*--- end mc_translate.c ---*/
/*--------------------------------------------------------------------*/
|
|
From: Oswald B. <os...@kd...> - 2007-08-28 07:35:04
|
On Tue, Aug 28, 2007 at 07:05:22AM +0100, sv...@va... wrote:
> Author: sewardj
> +static Bool is_helperc_value_checkN_fail ( HChar* name )
> +{
> + return
> + 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail)")
> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail)")
> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail)")
> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail)");
> +}
>
this looks incredibly slow to me, at least when partial matches are
expected to occur from time to time. how about using the address of a
globally stored instance of each string as a key? or just the address of
the function itself?
--
Hi! I'm a .signature virus! Copy me into your ~/.signature, please!
--
Chaos, panic, and disorder - my work here is done.
|
|
From: Nicholas N. <nj...@cs...> - 2007-08-28 11:20:02
|
On Tue, 28 Aug 2007, Oswald Buddenhagen wrote:
>> Author: sewardj
>> +static Bool is_helperc_value_checkN_fail ( HChar* name )
>> +{
>> + return
>> + 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail)")
>> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail)")
>> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail)")
>> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail)");
>> +}
>>
> this looks incredibly slow to me, at least when partial matches are
> expected to occur from time to time. how about using the address of a
> globally stored instance of each string as a key? or just the address of
> the function itself?
But it's not called very often.
Nick
|
|
From: Julian S. <js...@ac...> - 2007-08-28 11:33:49
|
On Tuesday 28 August 2007 12:18, Nicholas Nethercote wrote:
> On Tue, 28 Aug 2007, Oswald Buddenhagen wrote:
> >> Author: sewardj
> >> +static Bool is_helperc_value_checkN_fail ( HChar* name )
> >> +{
> >> + return
> >> + 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail)")
> >> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail)")
> >> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail)")
> >> + || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail)");
> >> +}
> >
> > this looks incredibly slow to me, at least when partial matches are
> > expected to occur from time to time. how about using the address of a
> > globally stored instance of each string as a key? or just the address of
> > the function itself?
>
> But it's not called very often.
Indeed, on an OProfile'd run of memcheck starting konqueror, I cannot
see this on the profile at all. Not only is it not called very often,
it tends to pay for itself fairly quickly by improving the quality of the
code emitted by the JIT.
J
|