|
From: <sv...@va...> - 2005-12-25 06:34:15
|
Author: njn
Date: 2005-12-25 06:34:04 +0000 (Sun, 25 Dec 2005)
New Revision: 5438
Log:
Merge in r5435 from COMPVBITS. Also added a note to
docs/internals/performance.txt about it.
Modified:
trunk/coregrind/m_execontext.c
trunk/coregrind/m_stacktrace.c
trunk/docs/internals/performance.txt
trunk/include/pub_tool_stacktrace.h
Modified: trunk/coregrind/m_execontext.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_execontext.c 2005-12-25 06:30:34 UTC (rev 5437)
+++ trunk/coregrind/m_execontext.c 2005-12-25 06:34:04 UTC (rev 5438)
@@ -46,7 +46,8 @@
=20
struct _ExeContext {
struct _ExeContext * next;
- /* Variable-length array. The size is VG_(clo_backtrace_size); at
+ UInt n_ips;
+ /* Variable-length array. The size is 'n_ips'; at
least 1, at most VG_DEEPEST_BACKTRACE. [0] is the current IP,
[1] is its caller, [2] is the caller of [1], etc. */
Addr ips[0];
@@ -126,38 +127,42 @@
/* Print an ExeContext. */
void VG_(pp_ExeContext) ( ExeContext* ec )
{
- VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) );
+ VG_(pp_StackTrace)( ec->ips, ec->n_ips );
}
=20
=20
/* Compare two ExeContexts, comparing all callers. */
Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
{
+ Int i;
+
if (e1 =3D=3D NULL || e2 =3D=3D NULL)=20
return False;
+
+ // Must be at least one address in each trace.
+ tl_assert(e1->n_ips >=3D 1 && e2->n_ips >=3D 1);
+
switch (res) {
case Vg_LowRes:
/* Just compare the top two callers. */
ec_cmp2s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
+ for (i =3D 0; i < 2; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_MedRes:
/* Just compare the top four callers. */
ec_cmp4s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
-
- if (VG_(clo_backtrace_size) < 3) return True;
- if (e1->ips[2] !=3D e2->ips[2]) return False;
-
- if (VG_(clo_backtrace_size) < 4) return True;
- if (e1->ips[3] !=3D e2->ips[3]) return False;
+ for (i =3D 0; i < 4; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_HighRes:
@@ -188,18 +193,20 @@
UWord hash;
ExeContext* new_ec;
ExeContext* list;
+ UInt n_ips;
=20
init_ExeContext_storage();
- vg_assert(VG_(clo_backtrace_size) >=3D 1=20
- && VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
+ vg_assert(VG_(clo_backtrace_size) >=3D 1 &&
+ VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
=20
- VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ n_ips =3D VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ tl_assert(n_ips >=3D 1);
=20
/* Now figure out if we've seen this one before. First hash it so
as to determine the list number. */
=20
hash =3D 0;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
hash ^=3D ips[i];
hash =3D (hash << 29) | (hash >> 3);
}
@@ -215,7 +222,7 @@
if (list =3D=3D NULL) break;
ec_searchcmps++;
same =3D True;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
if (list->ips[i] !=3D ips[i]) {
same =3D False;
break;=20
@@ -234,13 +241,14 @@
ec_totstored++;
=20
new_ec =3D VG_(arena_malloc)( VG_AR_EXECTXT,=20
- sizeof(struct _ExeContext *)=20
- + VG_(clo_backtrace_size) * sizeof(Addr) =
);
+ sizeof(struct _ExeContext)=20
+ + n_ips * sizeof(Addr) );
=20
- for (i =3D 0; i < VG_(clo_backtrace_size); i++)
+ for (i =3D 0; i < n_ips; i++)
new_ec->ips[i] =3D ips[i];
=20
- new_ec->next =3D ec_list[hash];
+ new_ec->n_ips =3D n_ips;
+ new_ec->next =3D ec_list[hash];
ec_list[hash] =3D new_ec;
=20
return new_ec;
Modified: trunk/coregrind/m_stacktrace.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_stacktrace.c 2005-12-25 06:30:34 UTC (rev 5437)
+++ trunk/coregrind/m_stacktrace.c 2005-12-25 06:34:04 UTC (rev 5438)
@@ -65,12 +65,9 @@
vg_assert(sizeof(Addr) =3D=3D sizeof(void*));
=20
/* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
- putting zeroes in when the trail goes cold, which we guess to be
+ stopping when the trail goes cold, which we guess to be
when FP is not a reasonable stack location. */
=20
- for (i =3D 0; i < n_ips; i++)
- ips[i] =3D 0;
-
// JRS 2002-sep-17: hack, to round up fp_max to the end of the
// current page, at least. Dunno if it helps.
// NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
Modified: trunk/docs/internals/performance.txt
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/docs/internals/performance.txt 2005-12-25 06:30:34 UTC (rev 543=
7)
+++ trunk/docs/internals/performance.txt 2005-12-25 06:34:04 UTC (rev 543=
8)
@@ -26,6 +26,9 @@
- Nick reduced the iteration count of the loop in swizzle() from 20 to 5=
,
which gave almost identical results while saving 2% in perf/tinycc and=
10%
in perf/heap on a 3GHz Prescott P4.
+- Nick changed ExeContext gathering to not record/save extra zeroes at t=
he
+ end. Saved 7% on perf/heap with --num-callers=3D50, and about 1% on
+ perf/tinycc.
=20
COMPVBITS branch:
- Nick converted to compress V bits, initial version saved 0--5% on most
Modified: trunk/include/pub_tool_stacktrace.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/include/pub_tool_stacktrace.h 2005-12-25 06:30:34 UTC (rev 5437=
)
+++ trunk/include/pub_tool_stacktrace.h 2005-12-25 06:34:04 UTC (rev 5438=
)
@@ -36,7 +36,8 @@
=20
// Walks the stack to get instruction pointers from the top stack frames=
for
// thread 'tid'. Maximum of 'n_ips' addresses put into 'ips'; 0 is the=
top
-// of the stack, 1 is its caller, etc.
+// of the stack, 1 is its caller, etc. Everything from ips[n_ips] onwar=
ds
+// is undefined and should not be read.
extern UInt VG_(get_StackTrace) ( ThreadId tid, StackTrace ips, UInt n_i=
ps );
=20
// Apply a function to every element in the StackTrace. The parameter '=
n'
|