|
From: <sv...@va...> - 2005-12-25 06:25:33
|
Author: njn
Date: 2005-12-25 06:25:27 +0000 (Sun, 25 Dec 2005)
New Revision: 5435
Log:
Made ExeContext gathering more efficient. Previously we were getting as
many code addresses as possible, and then filling the rest of the array (=
up
to VG_(clo_backtrace_size)) with zeroes. These zero entries were then
included in the hashing, and comparisons, and space was allocated for the=
m
in saved ExeContexts.
By not putting in the zeroes, not doing any hashing/comparisons of them, =
and
not storing them, we see speed-ups for perf/heap of 5% with
--num-callers=3D12 and 7% for --num-callers=3D50, and about 1.5% for tiny=
cc.
The amount of memory for ExeContexts also drops, saving around 500KB for
tinycc with --num-callers=3D50.
I also changed the allocation of each ExeContext to use "sizeof(struct
_ExeContext)" instead of "sizeof(struct _ExeContext *)". It worked ok
up until now because the struct only contained a single pointer in the
non-variable-sized part, but it was an accident waiting to happen when
struct _ExeContext changed (and indeed did happen to me when I added
'n_ips' to the struct).
Modified:
branches/COMPVBITS/coregrind/m_execontext.c
branches/COMPVBITS/coregrind/m_stacktrace.c
branches/COMPVBITS/include/pub_tool_stacktrace.h
Modified: branches/COMPVBITS/coregrind/m_execontext.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_execontext.c 2005-12-25 03:33:12 UTC (=
rev 5434)
+++ branches/COMPVBITS/coregrind/m_execontext.c 2005-12-25 06:25:27 UTC (=
rev 5435)
@@ -47,7 +47,8 @@
=20
struct _ExeContext {
struct _ExeContext * next;
- /* Variable-length array. The size is VG_(clo_backtrace_size); at
+ UInt n_ips;
+ /* Variable-length array. The size is 'n_ips'; at
least 1, at most VG_DEEPEST_BACKTRACE. [0] is the current IP,
[1] is its caller, [2] is the caller of [1], etc. */
Addr ips[0];
@@ -127,38 +128,42 @@
/* Print an ExeContext. */
void VG_(pp_ExeContext) ( ExeContext* ec )
{
- VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) );
+ VG_(pp_StackTrace)( ec->ips, ec->n_ips );
}
=20
=20
/* Compare two ExeContexts, comparing all callers. */
Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
{
+ Int i;
+
if (e1 =3D=3D NULL || e2 =3D=3D NULL)=20
return False;
+
+ // Must be at least one address in each trace.
+ tl_assert(e1->n_ips >=3D 1 && e2->n_ips >=3D 1);
+
switch (res) {
case Vg_LowRes:
/* Just compare the top two callers. */
ec_cmp2s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
+ for (i =3D 0; i < 2; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_MedRes:
/* Just compare the top four callers. */
ec_cmp4s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
-
- if (VG_(clo_backtrace_size) < 3) return True;
- if (e1->ips[2] !=3D e2->ips[2]) return False;
-
- if (VG_(clo_backtrace_size) < 4) return True;
- if (e1->ips[3] !=3D e2->ips[3]) return False;
+ for (i =3D 0; i < 4; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_HighRes:
@@ -189,20 +194,22 @@
UWord hash;
ExeContext* new_ec;
ExeContext* list;
+ UInt n_ips;
=20
VGP_PUSHCC(VgpExeContext);
=20
init_ExeContext_storage();
- vg_assert(VG_(clo_backtrace_size) >=3D 1=20
- && VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
+ vg_assert(VG_(clo_backtrace_size) >=3D 1 &&
+ VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
=20
- VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ n_ips =3D VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ tl_assert(n_ips >=3D 1);
=20
/* Now figure out if we've seen this one before. First hash it so
as to determine the list number. */
=20
hash =3D 0;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
hash ^=3D ips[i];
hash =3D (hash << 29) | (hash >> 3);
}
@@ -218,7 +225,7 @@
if (list =3D=3D NULL) break;
ec_searchcmps++;
same =3D True;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
if (list->ips[i] !=3D ips[i]) {
same =3D False;
break;=20
@@ -238,13 +245,14 @@
ec_totstored++;
=20
new_ec =3D VG_(arena_malloc)( VG_AR_EXECTXT,=20
- sizeof(struct _ExeContext *)=20
- + VG_(clo_backtrace_size) * sizeof(Addr) =
);
+ sizeof(struct _ExeContext)=20
+ + n_ips * sizeof(Addr) );
=20
- for (i =3D 0; i < VG_(clo_backtrace_size); i++)
+ for (i =3D 0; i < n_ips; i++)
new_ec->ips[i] =3D ips[i];
=20
- new_ec->next =3D ec_list[hash];
+ new_ec->n_ips =3D n_ips;
+ new_ec->next =3D ec_list[hash];
ec_list[hash] =3D new_ec;
=20
VGP_POPCC(VgpExeContext);
Modified: branches/COMPVBITS/coregrind/m_stacktrace.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_stacktrace.c 2005-12-25 03:33:12 UTC (=
rev 5434)
+++ branches/COMPVBITS/coregrind/m_stacktrace.c 2005-12-25 06:25:27 UTC (=
rev 5435)
@@ -68,12 +68,9 @@
vg_assert(sizeof(Addr) =3D=3D sizeof(void*));
=20
/* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
- putting zeroes in when the trail goes cold, which we guess to be
+ stopping when the trail goes cold, which we guess to be
when FP is not a reasonable stack location. */
=20
- for (i =3D 0; i < n_ips; i++)
- ips[i] =3D 0;
-
// JRS 2002-sep-17: hack, to round up fp_max to the end of the
// current page, at least. Dunno if it helps.
// NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
Modified: branches/COMPVBITS/include/pub_tool_stacktrace.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/include/pub_tool_stacktrace.h 2005-12-25 03:33:12 =
UTC (rev 5434)
+++ branches/COMPVBITS/include/pub_tool_stacktrace.h 2005-12-25 06:25:27 =
UTC (rev 5435)
@@ -36,7 +36,8 @@
=20
// Walks the stack to get instruction pointers from the top stack frames=
for
// thread 'tid'. Maximum of 'n_ips' addresses put into 'ips'; 0 is the=
top
-// of the stack, 1 is its caller, etc.
+// of the stack, 1 is its caller, etc. Everything from ips[n_ips] onwar=
ds
+// is undefined and should not be read.
extern UInt VG_(get_StackTrace) ( ThreadId tid, StackTrace ips, UInt n_i=
ps );
=20
// Apply a function to every element in the StackTrace. The parameter '=
n'
|