|
From: <sv...@va...> - 2007-10-17 03:31:32
|
Author: njn
Date: 2007-10-17 04:31:32 +0100 (Wed, 17 Oct 2007)
New Revision: 7010
Log:
Moved main-or-below-main filtering to the end, avoiding *many* calls to
VG_(get_fnname). This reduced konqueror startup time from 3.5 minutes to 36
seconds.
Modified:
branches/MASSIF2/massif/ms_main.c
branches/MASSIF2/massif/tests/culling1.stderr.exp
branches/MASSIF2/massif/tests/culling2.stderr.exp
branches/MASSIF2/massif/tests/deep-B.stderr.exp
branches/MASSIF2/massif/tests/deep-C.stderr.exp
branches/MASSIF2/massif/tests/deep-D.post.exp
branches/MASSIF2/massif/tests/deep.c
branches/MASSIF2/massif/tests/realloc.stderr.exp
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/ms_main.c 2007-10-17 03:31:32 UTC (rev 7010)
@@ -73,7 +73,15 @@
// many-xpts 0.05s ma: 1.6s (33.0x, -----)
// konqueror 3:45 real 3:35 user
//
+// Moved main-or-below-main filtering to the end, avoiding *many* calls to
+// VG_(get_fnname):
+// heap 0.62s ma:12.4s (20.0x, -----)
+// tinycc 0.45s ma: 5.1s (11.4x, -----)
+// many-xpts 0.09s ma: 2.1s (23.8x, -----)
+// konqueror 0:46 real 0:36 user
//
+//
+//
// Todo:
// - for regtests, need to filter out code addresses in *.post.* files
// - do snapshots on client requests
@@ -809,79 +817,43 @@
Char buf[BUF_LEN];
Int n_ips, i, n_alloc_fns_removed;
Int overestimate;
- Bool fewer_IPs_than_asked_for = False;
- Bool removed_below_main = False;
- Bool enough_IPs_after_filtering = False;
+ Bool redo;
- // XXX: get this properly
- Bool should_hide_below_main = /*!VG_(clo_show_below_main)*/True;
-
// We ask for a few more IPs than clo_depth suggests we need. Then we
- // remove every entry that is an alloc-fn or above an alloc-fn, and
- // remove anything below main-or-below-main functions. Depending on the
+ // remove every entry that is an alloc-fn. Depending on the
// circumstances, we may need to redo it all, asking for more IPs.
// Details:
- // - If the original stack trace is smaller than asked-for, redo=False
- // - Else if we see main-or-below-main in the stack trace, redo=False
- // - Else if after filtering we have more than clo_depth IPs, redo=False
+ // - If the original stack trace is smaller than asked-for, redo=False
+ // - Else if after filtering we have >= clo_depth IPs, redo=False
// - Else redo=True
// In other words, to redo, we'd have to get a stack trace as big as we
- // asked for, remove more than 'overestimate' alloc-fns, and not hit
- // main-or-below-main.
- //
- // Nb: it's possible that an alloc-fn may be found in the overestimate
- // portion, in which case the trace will be shrunk, even though it
- // arguably shouldn't. But it would require a very large chain of
- // alloc-fns, and the best behaviour isn't all that clear, so we don't
- // worry about it.
+ // asked for and remove more than 'overestimate' alloc-fns.
- // Main loop
- for (overestimate = 3; True; overestimate += 6) {
+ // Main loop.
+ redo = True; // Assume this to begin with.
+ for (overestimate = 3; redo; overestimate += 6) {
// This should never happen -- would require MAX_OVERESTIMATE
// alloc-fns to be removed from the stack trace.
if (overestimate > MAX_OVERESTIMATE)
VG_(tool_panic)("get_IPs: ips[] too small, inc. MAX_OVERESTIMATE?");
- // Ask for some more IPs than clo_depth suggests we need.
+ // Ask for more IPs than clo_depth suggests we need.
n_ips = VG_(get_StackTrace)( tid, ips, clo_depth + overestimate );
tl_assert(n_ips > 0);
- // If we got fewer IPs than we asked for, redo=False
- if (n_ips < clo_depth + overestimate) {
- fewer_IPs_than_asked_for = True;
- }
+ // If the original stack trace is smaller than asked-for, redo=False.
+ if (n_ips < clo_depth + overestimate) { redo = False; }
- // Filter out entries that are below main, if necessary.
- // XXX: stats -- should record how often this happens.
- // XXX: look at the "(below main)"/"__libc_start_main" mess
- // (m_stacktrace.c and m_demangle.c). Don't hard-code "(below
- // main)" in here.
- // [Nb: Josef wants --show-below-main to work for his fn entry/exit
- // tracing]
- if (should_hide_below_main) {
- for (i = n_ips-1; i >= 0; i--) {
- if (VG_(get_fnname)(ips[i], buf, BUF_LEN)) {
- if (VG_STREQ(buf, "main")) {
- // We found main. Ignore everything below it, and stop
- // looking. redo=False.
- n_ips = i+1;
- removed_below_main = True;
- break;
- } else if (VG_STREQ(buf, "(below main)")) {
- // We found "(below main)". Ignore everything below it,
- // but keep looking. redo=False.
- n_ips = i+1;
- removed_below_main = True;
- }
- }
- }
- }
+ // If it's a non-custom block, we will always remove the first stack
+ // trace entry (which will be one of malloc, __builtin_new, etc).
+ n_alloc_fns_removed = ( is_custom_alloc ? 0 : 1 );
- // Filter out alloc fns.
- n_alloc_fns_removed = 0;
- for (i = 0; i < n_ips; i++) {
+ // Filter out alloc fns. If it's a non-custom block, we remove the
+ // first entry (which will be one of malloc, __builtin_new, etc)
+ // without looking at it, because VG_(get_fnname) is expensive (it
+ // involves calls to VG_(malloc)/VG_(free)).
+ for (i = n_alloc_fns_removed; i < n_ips; i++) {
if (VG_(get_fnname)(ips[i], buf, BUF_LEN)) {
- // If it's an alloc-fn, we ignore it.
if (is_alloc_fn(buf)) {
n_alloc_fns_removed++;
} else {
@@ -889,44 +861,23 @@
}
}
}
-
- // There must be at least one alloc function, unless the client used
- // MALLOCLIKE_BLOCK.
- if (!is_custom_alloc) {
- if (n_alloc_fns_removed <= 0) {
- // Hmm. Print out the stack trace before aborting.
- for (i = 0; i < n_ips; i++) {
- if (VG_(get_fnname)(ips[i], buf, BUF_LEN)) {
- VG_(message)(Vg_DebugMsg, "--> %s", buf);
- } else {
- VG_(message)(Vg_DebugMsg, "--> ???", buf);
- }
- }
- tl_assert2(0, "Didn't find any alloc functions in the stack trace");
- }
- }
-
- // Ignore the alloc fns; shuffle the rest down.
+ // Remove the alloc fns by shuffling the rest down over them.
n_ips -= n_alloc_fns_removed;
for (i = 0; i < n_ips; i++) {
ips[i] = ips[i + n_alloc_fns_removed];
}
- // Did we get enough IPs after filtering? If so, redo=False.
+ // If after filtering we have >= clo_depth IPs, redo=False
if (n_ips >= clo_depth) {
+ redo = False;
n_ips = clo_depth; // Ignore any IPs below --depth.
- enough_IPs_after_filtering = True;
}
- if (fewer_IPs_than_asked_for ||
- removed_below_main ||
- enough_IPs_after_filtering)
- {
- return n_ips;
- } else {
+ if (redo) {
n_XCon_redos++;
}
}
+ return n_ips;
}
// Gets an XCon and puts it in the tree. Returns the XCon's bottom-XPt.
@@ -1814,14 +1765,14 @@
// XXX: do the filename properly, eventually
static Char* massif_out_file = "massif.out";
-#define BUF_SIZE 1024
-Char buf[1024];
+#define FP_BUF_SIZE 1024
+Char FP_buf[FP_BUF_SIZE];
// XXX: implement f{,n}printf in m_libcprint.c eventually, and use it here.
// Then change Cachegrind to use it too.
#define FP(format, args...) ({ \
- VG_(snprintf)(buf, BUF_SIZE, format, ##args); \
- VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); \
+ VG_(snprintf)(FP_buf, FP_BUF_SIZE, format, ##args); \
+ VG_(write)(fd, (void*)FP_buf, VG_(strlen)(FP_buf)); \
})
// Nb: uses a static buffer, each call trashes the last string returned.
@@ -1858,6 +1809,20 @@
ip_desc =
"(heap allocation functions) malloc/new/new[], --alloc-fns, etc.";
} else {
+ // If it's main-or-below-main, we (if appropriate) ignore everything
+ // below it by pretending it has no children.
+ // XXX: get this properly. Also, don't hard-code "(below main)"
+ // here -- look at the "(below main)"/"__libc_start_main" mess
+ // (m_stacktrace.c and m_demangle.c).
+ // [Nb: Josef wants --show-below-main to work for his fn entry/exit
+ // tracing]
+ Bool should_hide_below_main = /*!VG_(clo_show_below_main)*/True;
+ if (should_hide_below_main &&
+ VG_(get_fnname)(sxpt->Sig.ip, ip_desc, BUF_LEN) &&
+ (VG_STREQ(ip_desc, "main") || VG_STREQ(ip_desc, "(below main)")))
+ {
+ sxpt->Sig.n_children = 0;
+ }
// XXX: why the -1?
ip_desc = VG_(describe_IP)(sxpt->Sig.ip-1, ip_desc, BUF_LEN);
}
Modified: branches/MASSIF2/massif/tests/culling1.stderr.exp
===================================================================
--- branches/MASSIF2/massif/tests/culling1.stderr.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/culling1.stderr.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -424,12 +424,12 @@
Massif: heap frees: 0
Massif: stack allocs: 0
Massif: stack frees: 0
-Massif: XPts: 2
-Massif: top-XPts: 1 (50%)
-Massif: XPt-init-expansions: 1
+Massif: XPts: 4
+Massif: top-XPts: 1 (25%)
+Massif: XPt-init-expansions: 3
Massif: XPt-later-expansions: 0
-Massif: SXPt allocs: 30
-Massif: SXPt frees: 18
+Massif: SXPt allocs: 60
+Massif: SXPt frees: 36
Massif: skipped snapshots: 51
Massif: real snapshots: 150
Massif: detailed snapshots: 15
Modified: branches/MASSIF2/massif/tests/culling2.stderr.exp
===================================================================
--- branches/MASSIF2/massif/tests/culling2.stderr.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/culling2.stderr.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -527,12 +527,12 @@
Massif: heap frees: 0
Massif: stack allocs: 0
Massif: stack frees: 0
-Massif: XPts: 2
-Massif: top-XPts: 1 (50%)
-Massif: XPt-init-expansions: 1
+Massif: XPts: 4
+Massif: top-XPts: 1 (25%)
+Massif: XPt-init-expansions: 3
Massif: XPt-later-expansions: 0
-Massif: SXPt allocs: 40
-Massif: SXPt frees: 38
+Massif: SXPt allocs: 80
+Massif: SXPt frees: 76
Massif: skipped snapshots: 1
Massif: real snapshots: 200
Massif: detailed snapshots: 20
Modified: branches/MASSIF2/massif/tests/deep-B.stderr.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-B.stderr.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/deep-B.stderr.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -36,11 +36,11 @@
Massif: heap frees: 0
Massif: stack allocs: 0
Massif: stack frees: 0
-Massif: XPts: 7
-Massif: top-XPts: 1 (14%)
-Massif: XPt-init-expansions: 6
+Massif: XPts: 9
+Massif: top-XPts: 1 (11%)
+Massif: XPt-init-expansions: 8
Massif: XPt-later-expansions: 0
-Massif: SXPt allocs: 7
+Massif: SXPt allocs: 9
Massif: SXPt frees: 0
Massif: skipped snapshots: 0
Massif: real snapshots: 11
Modified: branches/MASSIF2/massif/tests/deep-C.stderr.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-C.stderr.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/deep-C.stderr.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -39,11 +39,11 @@
Massif: heap frees: 0
Massif: stack allocs: 0
Massif: stack frees: 0
-Massif: XPts: 4
-Massif: top-XPts: 1 (25%)
-Massif: XPt-init-expansions: 3
+Massif: XPts: 6
+Massif: top-XPts: 1 (16%)
+Massif: XPt-init-expansions: 5
Massif: XPt-later-expansions: 0
-Massif: SXPt allocs: 4
+Massif: SXPt allocs: 6
Massif: SXPt frees: 0
Massif: skipped snapshots: 0
Massif: real snapshots: 11
Modified: branches/MASSIF2/massif/tests/deep-D.post.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-D.post.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/deep-D.post.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -45,7 +45,8 @@
8 864 864 800 64 0
9 972 972 900 72 0
92.59% (900B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
-
+->92.59% (900B) 0x26ED5E: (below main) (in /lib/libc-2.3.5.so)
+
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) admin-heap(B) stacks(B)
--------------------------------------------------------------------------------
Modified: branches/MASSIF2/massif/tests/deep.c
===================================================================
--- branches/MASSIF2/massif/tests/deep.c 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/deep.c 2007-10-17 03:31:32 UTC (rev 7010)
@@ -8,8 +8,8 @@
// - In deep-C.vgtest, we have --alloc-fn=a3..a12, which means that get_XCon
// ends up with an empty stack trace after removing all the alloc-fns.
// It then redoes it.
-// - In deep-D.vgtest, we have --alloc-fn=main..a12, which means we have an empty
-// stack trace. That's ok.
+// - In deep-D.vgtest, we have --alloc-fn=main..a12, which means we have a
+// stack trace with a single "(below main)" entry.
#include <stdlib.h>
Modified: branches/MASSIF2/massif/tests/realloc.stderr.exp
===================================================================
--- branches/MASSIF2/massif/tests/realloc.stderr.exp 2007-10-16 23:18:06 UTC (rev 7009)
+++ branches/MASSIF2/massif/tests/realloc.stderr.exp 2007-10-17 03:31:32 UTC (rev 7010)
@@ -26,11 +26,11 @@
Massif: heap frees: 1
Massif: stack allocs: 0
Massif: stack frees: 0
-Massif: XPts: 5
-Massif: top-XPts: 4 (80%)
-Massif: XPt-init-expansions: 1
+Massif: XPts: 13
+Massif: top-XPts: 4 (30%)
+Massif: XPt-init-expansions: 9
Massif: XPt-later-expansions: 0
-Massif: SXPt allocs: 8
+Massif: SXPt allocs: 20
Massif: SXPt frees: 0
Massif: skipped snapshots: 0
Massif: real snapshots: 8
|