You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
1
(14) |
2
(8) |
3
(7) |
|
4
(7) |
5
(7) |
6
(6) |
7
(11) |
8
(10) |
9
(14) |
10
(10) |
|
11
(13) |
12
(15) |
13
(6) |
14
(8) |
15
(6) |
16
(6) |
17
(6) |
|
18
(6) |
19
(11) |
20
(15) |
21
(14) |
22
(11) |
23
(7) |
24
(17) |
|
25
(14) |
26
(28) |
27
(21) |
28
(23) |
29
(21) |
30
(17) |
31
(8) |
|
From: <sv...@va...> - 2007-03-28 23:26:22
|
Author: njn
Date: 2007-03-29 00:26:18 +0100 (Thu, 29 Mar 2007)
New Revision: 6684
Log:
- Fix bugs. Sanity checking complex data structures is a good idea.
- Added --threshold option, to control how much of the trees you see.
- (Temporary): showing the final global XTree at the end.
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-03-28 13:17:18 UTC (rev 6683)
+++ branches/MASSIF2/massif/ms_main.c 2007-03-28 23:26:18 UTC (rev 6684)
@@ -32,6 +32,7 @@
// XXX:
//---------------------------------------------------------------------------
// Next:
+// - print percentages/sizes in "the rest" entries
// - truncate really long file names [hmm, could make getting the name of
// alloc-fns more difficult]
// - Check MALLOCLIKE_BLOCK works, write regtest
@@ -84,6 +85,7 @@
// - there's a gap between the ms timer initialisation during Valgrind
// start-up and our first use of it. Could normalise versus our first
// use...
+// - could conceivably remove XPts that have their szB reduced to zero.
//
// Docs:
// - need to explain that --alloc-fn changed slightly -- now if an entry
@@ -292,8 +294,10 @@
// This isn't exactly right, because we actually drop (N/2)-1 when halving,
// but it shows the basic idea.
+// XXX: if the program is really short, we may get no detailed snapshots...
+// that's bad, do something about it.
#define MAX_N_SNAPSHOTS 50 // Keep it even, for simplicity
-#define DETAILED_SNAPSHOT_FREQ 10 // Every Nth snapshot will be detailed
+#define DETAILED_SNAPSHOT_FREQ 2 // Every Nth snapshot will be detailed
typedef
struct {
@@ -355,6 +359,8 @@
#define FILENAME_LEN 256
+#define P VG_(printf)
+
#define SPRINTF(zz_buf, fmt, args...) \
do { Int len = VG_(sprintf)(zz_buf, fmt, ## args); \
VG_(write)(fd, (void*)zz_buf, len); \
@@ -409,15 +415,18 @@
static UInt clo_heap_admin = 8;
static Bool clo_stacks = True;
static Bool clo_depth = 8;
+static UInt clo_threshold = 100; // 100 == 1%
static Bool ms_process_cmd_line_option(Char* arg)
{
VG_BOOL_CLO(arg, "--heap", clo_heap)
else VG_BOOL_CLO(arg, "--stacks", clo_stacks)
- else VG_NUM_CLO (arg, "--heap-admin", clo_heap_admin)
- else VG_BNUM_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH)
+ else VG_NUM_CLO (arg, "--heap-admin", clo_heap_admin)
+ else VG_BNUM_CLO(arg, "--depth", clo_depth, 1, MAX_DEPTH)
+ else VG_NUM_CLO(arg, "--threshold", clo_threshold)
+
else if (VG_CLO_STREQN(11, arg, "--alloc-fn=")) {
int i;
@@ -450,6 +459,9 @@
" --stacks=no|yes profile stack(s) [yes]\n"
" --depth=<number> depth of contexts [8]\n"
" --alloc-fn=<name> specify <fn> as an alloc function [empty]\n"
+" --threshold=<n> significance threshold, in 100ths of a percent\n"
+" (eg. <n>=100 shows nodes covering >= 1%% of\n"
+" total size, <n>=0 shows all nodes) [100]\n"
);
VG_(replacement_malloc_print_usage)();
}
@@ -740,6 +752,61 @@
/*------------------------------------------------------------*/
+/*--- Sanity checking ---*/
+/*------------------------------------------------------------*/
+
+__attribute__((unused))
+static void pp_XPt(XPt* xpt)
+{
+ Int i;
+ P("XPt (%p):\n", xpt);
+ P("- ip: : %p\n", (void*)xpt->ip);
+ P("- curr_szB : %ld\n", xpt->curr_szB);
+ P("- parent : %p\n", xpt->parent);
+ P("- n_children : %d\n", xpt->n_children);
+ P("- max_children: %d\n", xpt->max_children);
+ for (i = 0; i < xpt->n_children; i++) {
+ P("- children[%2d]: %p\n", i, xpt->children[i]);
+ }
+}
+
+static void sanity_check_XTree(XPt* xpt, XPt* parent)
+{
+ Int i;
+ SizeT children_sum_szB = 0;
+
+ if (NULL == xpt)
+ return;
+
+ // Check back-pointer.
+ tl_assert2(xpt->parent == parent,
+ "xpt->parent = %p, parent = %p\n", xpt->parent, parent);
+
+ // Check children counts look sane.
+ tl_assert(xpt->n_children <= xpt->max_children);
+
+ // Check the sum of any children szBs equals the XPt's szB.
+ if (xpt->n_children > 0) {
+ for (i = 0; i < xpt->n_children; i++) {
+ children_sum_szB += xpt->children[i]->curr_szB;
+ }
+ tl_assert(children_sum_szB == xpt->curr_szB);
+ }
+
+ // Check each child.
+ for (i = 0; i < xpt->n_children; i++) {
+ sanity_check_XTree(xpt->children[i], xpt);
+ }
+}
+
+static void sanity_check_snapshot(Snapshot* snapshot)
+{
+ tl_assert(snapshot->total_szB ==
+ snapshot->heap_admin_szB + snapshot->heap_szB + snapshot->stacks_szB);
+ sanity_check_XTree(snapshot->alloc_xpt, /*parent*/NULL);
+}
+
+/*------------------------------------------------------------*/
/*--- Taking a snapshot ---*/
/*------------------------------------------------------------*/
@@ -823,18 +890,18 @@
// XXX: taking a full snapshot... could/should just snapshot the significant
// parts. Nb: then the amounts wouldn't add up, unless I represented the
// "other insignificant places" in XPts.
-static XPt* dup_XTree(XPt* xpt)
+static XPt* dup_XTree(XPt* xpt, XPt* parent)
{
Int i;
XPt* dup_xpt = VG_(malloc)(sizeof(XPt));
dup_xpt->ip = xpt->ip;
dup_xpt->curr_szB = xpt->curr_szB;
- dup_xpt->parent = xpt->parent;
+ dup_xpt->parent = parent; // Nb: not xpt->children!
dup_xpt->n_children = xpt->n_children;
dup_xpt->max_children = xpt->n_children; // Nb: don't copy max_children!
dup_xpt->children = VG_(malloc)(dup_xpt->max_children * sizeof(XPt*));
for (i = 0; i < xpt->n_children; i++) {
- dup_xpt->children[i] = dup_XTree(xpt->children[i]);
+ dup_xpt->children[i] = dup_XTree(xpt->children[i], dup_xpt);
}
n_dupd_xpts++;
@@ -885,7 +952,8 @@
snapshot->heap_szB = heap_szB;
// Take a detailed snapshot if it's been long enough since the last one.
if (DETAILED_SNAPSHOT_FREQ == n_snapshots_since_last_detailed) {
- snapshot->alloc_xpt = dup_XTree(alloc_xpt);
+ snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL);
+ tl_assert(snapshot->alloc_xpt->curr_szB == heap_szB);
n_snapshots_since_last_detailed = 0;
} else {
n_snapshots_since_last_detailed++;
@@ -913,6 +981,9 @@
snapshot->total_szB =
snapshot->heap_szB + snapshot->heap_admin_szB + snapshot->stacks_szB;
+ // Sanity-check it.
+ sanity_check_snapshot(snapshot);
+
// Update peak data -------------------------------------------------
// XXX: this is not really the right way to do peak data -- it's only
// peak snapshot data, the true peak could be between snapshots.
@@ -963,29 +1034,29 @@
}
static
-void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
+void* new_block ( ThreadId tid, void* p, SizeT szB, SizeT alignB,
Bool is_zeroed )
{
HP_Chunk* hc;
Bool custom_alloc = (NULL == p);
- if (size < 0) return NULL;
+ if (szB < 0) return NULL;
// Update statistics
n_allocs++;
- if (0 == size) n_zero_allocs++;
+ if (0 == szB) n_zero_allocs++;
// Allocate and zero if necessary
if (!p) {
- p = VG_(cli_malloc)( align, size );
+ p = VG_(cli_malloc)( alignB, szB );
if (!p) {
return NULL;
}
- if (is_zeroed) VG_(memset)(p, 0, size);
+ if (is_zeroed) VG_(memset)(p, 0, szB);
}
// Make new HP_Chunk node, add to malloc_list
hc = VG_(malloc)(sizeof(HP_Chunk));
- hc->szB = size;
+ hc->szB = szB;
hc->data = (Addr)p;
hc->where = NULL; // paranoia
@@ -995,7 +1066,7 @@
// Update XTree, if necessary
if (clo_heap) {
hc->where = get_XCon( tid, custom_alloc );
- update_XCon(hc->where, size);
+ update_XCon(hc->where, szB);
}
VG_(HT_add_node)(malloc_list, hc);
@@ -1098,29 +1169,29 @@
/*--- malloc() et al replacement wrappers ---*/
/*------------------------------------------------------------*/
-static void* ms_malloc ( ThreadId tid, SizeT n )
+static void* ms_malloc ( ThreadId tid, SizeT szB )
{
- return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
+ return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
}
-static void* ms___builtin_new ( ThreadId tid, SizeT n )
+static void* ms___builtin_new ( ThreadId tid, SizeT szB )
{
- return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
+ return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
}
-static void* ms___builtin_vec_new ( ThreadId tid, SizeT n )
+static void* ms___builtin_vec_new ( ThreadId tid, SizeT szB )
{
- return new_block( tid, NULL, n, VG_(clo_alignment), /*is_zeroed*/False );
+ return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
}
-static void* ms_calloc ( ThreadId tid, SizeT m, SizeT size )
+static void* ms_calloc ( ThreadId tid, SizeT m, SizeT szB )
{
- return new_block( tid, NULL, m*size, VG_(clo_alignment), /*is_zeroed*/True );
+ return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
}
-static void *ms_memalign ( ThreadId tid, SizeT align, SizeT n )
+static void *ms_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
{
- return new_block( tid, NULL, n, align, False );
+ return new_block( tid, NULL, szB, alignB, False );
}
static void ms_free ( ThreadId tid, void* p )
@@ -1138,9 +1209,12 @@
die_block( p, /*custom_free*/False );
}
-static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_size )
+static void* ms_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
{
- return renew_block(tid, p_old, new_size);
+// return renew_block(tid, p_old, new_size);
+ die_block( p_old, /*custom_free*/False );
+ return new_block( tid, NULL, new_szB, VG_(clo_alignment),
+ /*is_zeroed*/False );
}
@@ -1168,10 +1242,11 @@
switch (argv[0]) {
case VG_USERREQ__MALLOCLIKE_BLOCK: {
void* res;
- void* p = (void*)argv[1];
- SizeT sizeB = argv[2];
- *ret = 0;
- res = new_block( tid, p, sizeB, /*align--ignored*/0, /*is_zeroed*/False );
+ void* p = (void*)argv[1];
+ SizeT szB = argv[2];
+ *ret = 0;
+ res =
+ new_block( tid, p, szB, /*alignB--ignored*/0, /*is_zeroed*/False );
tl_assert(res == p);
return True;
}
@@ -1232,8 +1307,6 @@
}
#endif
-#define P VG_(printf)
-
static void write_text_graph(void)
{
Int i;
@@ -1373,6 +1446,8 @@
static Char* make_perc(ULong x, ULong y)
{
static Char mbuf[32];
+
+// tl_assert(x <= y); XXX; put back in later...
// XXX: I'm not confident that VG_(percentify) works as it should...
VG_(percentify)(x, y, 1, 5, mbuf);
@@ -1386,7 +1461,12 @@
// XXX: make command-line controllable?
static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
{
- return (xpt->curr_szB * 1000 / curr_total_szB >= 10); // < 1%?
+ // clo_threshold is measured in hundredths of a percent of total size,
+ // ie. 10,000ths of total size. So clo_threshold=100 means that the
+ // threshold is 1% of total size.
+ tl_assert(xpt->curr_szB <= curr_total_szB);
+ // XXX: overflow danger here...
+ return (xpt->curr_szB * 10000 / curr_total_szB >= clo_threshold);
}
static void pp_snapshot_child_XPts(XPt* parent, Int depth, Char* depth_str,
@@ -1404,6 +1484,7 @@
children_sum_szB += parent->children[i]->curr_szB;
}
tl_assert(children_sum_szB == parent->curr_szB);
+// VG_(printf)("szB = %,ld B\n", children_sum_szB);
// Sort children by curr_szB (reverse order: biggest to smallest)
// XXX: is it better to keep them always in order?
@@ -1424,7 +1505,7 @@
// This child is significant. Print it.
perc = make_perc(child->curr_szB, curr_total_szB);
ip_desc = VG_(describe_IP)(child->ip-1, buf2, BUF_LEN);
- P("->%6s: %s\n", perc, ip_desc);
+ P("->%6s(%,ldB): %s\n", perc, child->curr_szB, ip_desc);
// If the child has any children, print them. But first add the
// prefix for them, which is " " if the parent has no smaller
@@ -1472,6 +1553,8 @@
Int depth_str_len = clo_depth * 2 + 2;
Char* depth_str = VG_(malloc)(sizeof(Char) * depth_str_len);
depth_str[0] = '\0'; // Initialise to "".
+
+ sanity_check_snapshot(snapshot);
P("=================================\n");
P("== snapshot %d\n", snapshot_n);
@@ -1491,11 +1574,12 @@
P("(No heap memory currently allocated)\n");
} else {
P("Heap tree:\n");
- P("%6s: (heap allocation functions) malloc/new/new[],"
- " --alloc-fn functions, etc.\n",
- make_perc(snapshot->heap_szB, snapshot->total_szB));
+ P("%6s(%,ldB): (heap allocation functions) malloc/new/new[],"
+ " --alloc-fns, etc.\n",
+ make_perc(snapshot->heap_szB, snapshot->total_szB),
+ snapshot->heap_szB);
- pp_snapshot_child_XPts(alloc_xpt, 0, depth_str, depth_str_len,
+ pp_snapshot_child_XPts(snapshot->alloc_xpt, 0, depth_str, depth_str_len,
snapshot->heap_szB, snapshot->total_szB);
}
@@ -1505,12 +1589,26 @@
static void write_snapshots(void)
{
Int i;
+
+ // XXX: temporary
+ Int depth_str_len = clo_depth * 2 + 2;
+ Char* depth_str = VG_(malloc)(sizeof(Char) * depth_str_len);
+ depth_str[0] = '\0'; // Initialise to "".
+
for (i = 0; i < curr_snapshot; i++) {
Snapshot* snapshot = & snapshots[i];
if (snapshot->alloc_xpt) {
pp_snapshot(snapshot, i); // Detailed snapshot!
}
}
+ P("=================================\n");
+ P("== main XTree\n");
+ P("=================================\n");
+ P("heap szB: %ld\n", heap_szB);
+ pp_snapshot_child_XPts(alloc_xpt, 0, depth_str, depth_str_len,
+ heap_szB, 999999);
+
+ VG_(free)(depth_str);
}
/*------------------------------------------------------------*/
|
|
From: Nicholas N. <nj...@cs...> - 2007-03-28 22:04:49
|
On Wed, 28 Mar 2007, Vince Weaver wrote: > I've just released the simpoint tool: > > http://www.csl.cornell.edu/~vince/projects/valsim/ > I'm working right now on writing up all the results of the validation > effort. Nice. I'd be interested to see the paper when it's done. You do things at the superblock level rather than the basic block level because Valgrind uses superblocks. I think if you use --vex-guest-chase-thresh=0 it will not chase across branches and thus will effectively do things at a basic block level. At least, that's what the comments for VexControl in VEX/pub/libvex.h seem to indicate -- Julian, is that right? (You could write another paper comparing accuracy of BBs vs SBs in SimPoint tools ;) You might then want to increase --vex-guest-max-insns to allow BBs to be as big as possible (at least bigger than 50, which is the default maximum). Nick |
|
From: Nicholas N. <nj...@cs...> - 2007-03-28 21:54:16
|
On Wed, 28 Mar 2007, Josef Weidendorfer wrote: > On Wednesday 28 March 2007, you wrote: >> Welcome. If you get any speedups as a result of these tricks I'd >> be interested to hear about them, because the main valgrind dispatcher >> (m_dispatch) suffers from exactly the same problem. > > I thought that SB chaining reduces the problem here, does it? It would, if Valgrind did it :) It used to (in the UCode days), but it hasn't been implemented for Vex yet. Vex does chase across some unconditional branches, though. Nick |
|
From: Nicholas N. <nj...@cs...> - 2007-03-28 21:48:30
|
On Thu, 29 Mar 2007, Yao Qi wrote: > I am playing with helgrind on data race detection, and want to implement > my data race detection in the framework of helgrind. > > I find that hg_thread_create and hg_thread_join is "hooked" to > VG_(track_post_thread_create) and VG_(track_post_thread_join) > respectively. One point confused me is how VG_(track_post_thread_join) > is called? I could find the location invoke > VG_(track_post_thread_create), but I could not find any thing call this > function. I set breakpoint on hg_thread_join in gdb, and the helgrind > does not hit this breakpoint either. > > Am I missing something? Thanks. These functions are called via functions pointers. The names depend on which version you're using, but look at the code within VG_(track_post_thread_create) to see where the function pointer is being assigned. Also beware that the point where the function pointer is called may involve macros, and so the names might not match up exactly. Nick |
|
From: Josef W. <Jos...@gm...> - 2007-03-28 20:11:40
|
On Wednesday 28 March 2007, Josef Weidendorfer wrote: > Depends. Here some preliminary results on a Core2, 2.6 Gz. > With client code "perf/tinycc -c perf/test_input_for_tinycc.c", > which gives D1 miss rate of 0.2%, and L2 miss rate of 0.0% > (do we have a test which gives low hit rate?). > > 32 bit version VG+client (878 millions client instructions) > Original Cachegrind (3.3.0.SVN): 24.2 s > Using buffer and producer/consumer > with event tag in message, 1 switch: 27.8 s (14% slowdown) > dito, multiple switches: 27.1 s (11% slowdown) > with handler function pointers in message > & one indirect call: 29.0 s (19% slowdown) > > Using multiple calls, ie. an indirect call at end of every > event handler has the problem that you get a huge call chain and > occupy much stack space. > I'm just trying to come up with a threaded version with computed > goto's. Update: With computed goto and multiple indirect jumps I get the 27.1 s figure as with the multiple switch statements. Actually, that probably was to be expected. However, this was a little nasty: the multiple "goto *next"'s were optimized into one indirect jump only. So I had to use "asm volatile ..." to force the multiple jumps. Josef |
|
From: Vince W. <vi...@cs...> - 2007-03-28 19:56:17
|
> Thanks! I see your tools at > http://www.csl.cornell.edu/~vince/software.html, are they publically > available? I've just released the simpoint tool: http://www.csl.cornell.edu/~vince/projects/valsim/ I'm working right now on writing up all the results of the validation effort. I will release the other two tools eventually; unfortunately I am a bit swamped for time right now, trying to hit some paper deadlines. The valsim tool generates an x86 instruction stream for TAXI which is an x86 simulator that can't decode instructions itself. This is of limited use because I think the TAXI developer has taken the sources for it off of his website. The cache tool was part of a project I helped an MEng student do for their research project. It just dumps raw memory and instruction access traces to a pipe, and we had a cache-simuator read from the pipe in a different process and did some cache analysis. Vince |
|
From: Josef W. <Jos...@gm...> - 2007-03-28 17:21:16
|
On Wednesday 28 March 2007, you wrote: > Welcome. If you get any speedups as a result of these tricks I'd > be interested to hear about them, because the main valgrind dispatcher > (m_dispatch) suffers from exactly the same problem. I thought that SB chaining reduces the problem here, does it? Or is this specifically about indirect jumps in the client code? It really would be nice if there was a way to directly influence the branch predictor (eg. write an entry into the BTB). This would solve the problem at least for me, as I can look up the next target for the indirect jump way before it is executed. Recently I read a paper about the IBM Cell, and for the SPUs (quite simplistic SIMD cores), you have to specify the possible branch target yourself to make the branch "predictor" work. Exactly what would be needed here ;-) Josef > > J > |
|
From: Josef W. <Jos...@gm...> - 2007-03-28 17:07:07
|
On Tuesday 27 March 2007, Nicholas Nethercote wrote: > > Maybe a dirty helper should specify a default value for the temporary if > > the call is not taken. That would make it safe. If the backend can prove > > the call is always taken then it can omit computation/assignment of the > > default and so avoid performance loss in the common cases. Hmm, this is > > all a bit messy. Need to think about it more. > > Or you could just disallow return values in conditional calls? That would > be fine with all the current uses, and with Josef's proposed usage too, if I > understand correctly. My instrumentation loads the buffer's write pointer into a temporary reg at beginning of a SB. This has to be done after the helper call, as that one can change the write pointer. However, I already need the write pointer before for calculation of the guard boolean, and the second load should not be needed. So a default return value when there is no helper call done, indeed would be nice. Josef > More generally, how do you represent a conditional move in SSA? I don't > think you can, conditional moves don't really make sense when you don't have > specific locations (eg. registers) for holding values. > > N > |
|
From: Josef W. <Jos...@gm...> - 2007-03-28 16:55:17
|
On Tuesday 27 March 2007, you wrote: > On Tue, 27 Mar 2007, Josef Weidendorfer wrote: > > > recently I was playing with the idea to speed up cache simulation > > by using 2 cores in these (not that new) dual-core processors. > > > > The idea is to run the cache simulation in another thread (*1*), > > feeded by memory access events written into a buffer by > > cachegrind generated as usual. > > There's a paper "SuperPin: Parallelizing Dynamic Instrumentation for > Real-Time Performance" by Wallace and Hazelwood that discusses this topic. Oh, thanks. > So what was the overall slow-down of this producer/consumer version vs > original? Depends. Here some preliminary results on a Core2, 2.6 Gz. With client code "perf/tinycc -c perf/test_input_for_tinycc.c", which gives D1 miss rate of 0.2%, and L2 miss rate of 0.0% (do we have a test which gives low hit rate?). 32 bit version VG+client (878 millions client instructions) Original Cachegrind (3.3.0.SVN): 24.2 s Using buffer and producer/consumer with event tag in message, 1 switch: 27.8 s (14% slowdown) dito, multiple switches: 27.1 s (11% slowdown) with handler function pointers in message & one indirect call: 29.0 s (19% slowdown) Using multiple calls, ie. an indirect call at end of every event handler has the problem that you get a huge call chain and occupy much stack space. I'm just trying to come up with a threaded version with computed goto's. However, this version is very dumb, and puts a message into the buffer for every original log_* function of the current version of Cachegrind, inclusive all the parameters and also with some header overhead. It should be possible to make one message per SB, and putting e.g. the data_size back to InstrInfo, thus reducing the message size. BTW, the 64bit version is even slower because of the large messages. > I tried the same thing a few years ago, and found that the number of > instructions was greatly reduced (by roughly 25% IIRC) but that the increase > in branch mispredictions neutralized that and speed was similar to the > original, and the producer/consumer version was more complex. The separation of producer and consumer theoretically has the option to split them either into separate threads or processes. The latter would free the development of online postprocessing from Valgrind constrains such as C/assembler only and limited runtime library (no pthreads), even if you do not think about parallelization of the cache simulation. Where did you get the instruction reduction in your version? How were the messages built up? > > Perhaps this idea is crazy, and in the end not worthwhile. > > For the visioned parallelization to be useful, the communication > > overhead to the other core needs to be really low, which > > means to minimize MESI transactions. > > It, like a lot of parallel programming tasks, sounds difficult and fragile. Yes, however, a simple producer/consumer scheme should not be very complicated. Why do you think this could be fragile? However, for sure it is not easy to really get speedups. First, there is the problem of the pure communication overhead. Second, you have the load balancing issue. Josef |
|
From: Yao Qi <qiy...@gm...> - 2007-03-28 16:26:14
|
I am playing with helgrind on data race detection, and want to implement my data race detection in the framework of helgrind. I find that hg_thread_create and hg_thread_join is "hooked" to VG_(track_post_thread_create) and VG_(track_post_thread_join) respectively. One point confused me is how VG_(track_post_thread_join) is called? I could find the location invoke VG_(track_post_thread_create), but I could not find any thing call this function. I set breakpoint on hg_thread_join in gdb, and the helgrind does not hit this breakpoint either. Am I missing something? Thanks. -- Yao Qi <qiy...@gm...> GNU/Linux Developer http://duewayqi.googlepages.com/ Think twice before speaking, but don't say "think think click click". |
|
From: <sv...@va...> - 2007-03-28 13:17:21
|
Author: njn
Date: 2007-03-28 14:17:18 +0100 (Wed, 28 Mar 2007)
New Revision: 6683
Log:
- main change: now not printing snapshots as they're taken. Instead
copying the XTree at certain ("detailed") snapshots, and then printing
them at the end, after the graph. This is less space-efficient than it
could be.
- printing a legend for the graph.
- added more notes, redid some comments.
- moved some code around
- It's borderline useful now. Adding the peak would make it better.
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-03-28 12:16:55 UTC (rev 6682)
+++ branches/MASSIF2/massif/ms_main.c 2007-03-28 13:17:18 UTC (rev 6683)
@@ -69,10 +69,6 @@
// 143062 cra massif crashes on app exit with signal 8 SIGFPE
// - occurs with no allocations -- ensure that case works
//
-// Work out when to take periodic snapshots.
-// - If I separate content from presentation I don't have to thin out the
-// old ones (but not doing so takes space...)
-//
// Work out how to take the peak.
// - exact peak, or within a certain percentage?
// - include the stack? makes it harder
@@ -83,6 +79,12 @@
// - "show me the extra allocations from last-snapshot"
// - "start/stop logging" (eg. quickly skip boring bits)
//
+// Other:
+// - am I recording asked-for sizes or actual rounded-up sizes?
+// - there's a gap between the ms timer initialisation during Valgrind
+// start-up and our first use of it. Could normalise versus our first
+// use...
+//
// Docs:
// - need to explain that --alloc-fn changed slightly -- now if an entry
// matches an alloc-fn, that entry *and all above it* are removed. So you
@@ -134,23 +136,36 @@
/*--- Overview of operation ---*/
/*------------------------------------------------------------*/
-// Heap blocks are tracked, and the amount of space allocated by various
-// contexts (ie. lines of code, more or less) is also tracked.
-// "Snapshots", ie. detailed recordings of the memory usage, are taken every
-// so often. There are two kinds of snapshot:
+// The size of the stacks and heap is tracked. The heap is tracked in a lot
+// of detail, enough to tell how many bytes each line of code is responsible
+// for, more or less.
+//
+// "Snapshots" are recordings of the memory usage. There are two basic
+// kinds:
+// - Normal: these record the current time, total memory size, total heap
+// size, heap admin size and stack size.
+// - Detailed: these record those things in a normal snapshot, plus a very
+// detailed XTree (see below) indicating how the heap is structured.
+//
+// Snapshots are taken every so often. There are two storage classes of
+// snapshots:
// - Temporary: Massif does a temporary snapshot every so often. The idea
// is to always have a certain number of temporary snapshots around. So
// we take them frequently to begin with, but decreasingly often as the
// program continues to run. Also, we remove some old ones after a while.
-// Overall it's a kind of exponential decay thing.
-// - Permanent: Massif takes a permanent snapshot in some circumstances.
-// They are:
+// Overall it's a kind of exponential decay thing. Most of these are
+// normal snapshots, a small fraction are detailed snapshots.
+// - Permanent: Massif takes a permanent (detailed) snapshot in some
+// circumstances. They are:
// - Peak snapshot: When the memory usage peak is reached, it takes a
// snapshot. It keeps this, unless the peak is subsequently exceeded,
// in which case it will overwrite the peak snapshot.
// - User-requested snapshots: These are done in response to client
// requests. They are always kept.
+//
+// The summary output produced by Massif could include a graph like this.
//
+//---------------------------------------------------------------------------
// 100M|B . :A
// | .::: :::#
// | :::::. c:::#:
@@ -176,15 +191,19 @@
//
// Temporary snapshots:
// a: periodic snapshot, total size: 33,000,000 bytes
-// b: periodic snapshot, total size: ... bytes
-// c: periodic snapshot, total size: ... bytes
-// d: periodic snapshot, total size: ... bytes
-// e: periodic snapshot, total size: ... bytes
-// f: periodic snapshot, total size: ... bytes
-// g: periodic snapshot, total size: ... bytes
-// h: periodic snapshot, total size: ... bytes
+// b: periodic snapshot, total size: 82,000,000 bytes
+// c: periodic snapshot, total size: 90,000,000 bytes
+// d: periodic snapshot, total size: 64,000,000 bytes
+// e: periodic snapshot, total size: 34,000,000 bytes
+// f: periodic snapshot, total size: 24,000,000 bytes
+// g: periodic snapshot, total size: 39,000,000 bytes
+// h: periodic snapshot, total size: 33,000,000 bytes
//
-// Explanation of y-axis:
+// Permanent snapshots:
+// A: peak snapshot, total size: 100,000,000 bytes
+//---------------------------------------------------------------------------
+//
+// Explanation of the y-axis:
// - Top of the x-axis box represents 0.
//
// 4M^| .: This row has base=2M, half-threshold=3M, full-threshold=4M
@@ -207,28 +226,13 @@
// - etc.
-// Periodically, a census is taken, and the amount of space used, at that
-// point, by the most significant (highly allocating) contexts is recorded.
-// Census start off frequently, but are scaled back as the program goes on,
-// so that there are always a good number of them. At the end, overall
-// spacetimes for different contexts (of differing levels of precision) is
-// calculated, the graph is printed, and the text giving spacetimes for the
-// increasingly precise contexts is given.
-//
-// Measures the following:
-// - heap blocks
-// - heap admin bytes
-// - stack(s)
-// - code (code segments loaded at startup, and loaded with mmap)
-// - data (data segments loaded at startup, and loaded/created with mmap,
-// and brk()d segments)
-
/*------------------------------------------------------------*/
/*--- Main types ---*/
/*------------------------------------------------------------*/
// An XPt represents an "execution point", ie. a code address. Each XPt is
-// part of a tree of XPts (an "execution tree", or "XTree").
+// part of a tree of XPts (an "execution tree", or "XTree"). The details of
+// the heap are represented by a single XTree.
//
// The root of the tree is 'alloc_xpt', which represents all allocation
// functions, eg:
@@ -276,42 +280,31 @@
XPt** children; // pointers to children XPts
};
-// Each census snapshots the most significant XTrees, each XTree having a
-// top-XPt as its root. The 'curr_szB' element for each XPt is recorded
-// in the snapshot. The snapshot contains all the XTree's XPts, not in a
-// tree structure, but flattened into an array. This flat snapshot is used
-// at the end for computing exact_ST_dbld for each XPt.
-//
-// Graph resolution, x-axis: no point having more than about 200 census
-// x-points; you can't see them on the graph. Therefore:
-//
-// - do a census every 1 ms for first 200 --> 200, all (200 ms)
-// - halve (drop half of them) --> 100, every 2nd (200 ms)
-// - do a census every 2 ms for next 200 --> 200, every 2nd (400 ms)
-// - halve --> 100, every 4th (400 ms)
-// - do a census every 4 ms for next 400 --> 200, every 4th (800 ms)
+// Snapshots are done so we keep a good number of them. If MAX_N_SNAPSHOTS
+// equals 200, then it works something like this:
+// - do a snapshot every 1ms for first 200ms --> 200, all (200 ms)
+// - halve (drop half of them) --> 100, every 2nd
+// - do a snapshot every 2ms for next 200ms --> 200, every 2nd (400 ms)
+// - halve --> 100, every 4th
+// - do a snapshot every 4ms for next 400ms --> 200, every 4th (800 ms)
// - etc.
//
// This isn't exactly right, because we actually drop (N/2)-1 when halving,
// but it shows the basic idea.
-#define MAX_N_CENSI 10 // Keep it even, for simplicity
+#define MAX_N_SNAPSHOTS 50 // Keep it even, for simplicity
+#define DETAILED_SNAPSHOT_FREQ 10 // Every Nth snapshot will be detailed
-// Graph resolution, y-axis: hp2ps only draws the 19 biggest (in space-time)
-// bands, rest get lumped into OTHERS. I only print the top N
-// (cumulative-so-far space-time) at each point. N should be a bit bigger
-// than 19 in case the cumulative space-time doesn't fit with the eventual
-// space-time computed by hp2ps (but it should be close if the samples are
-// evenly spread, since hp2ps does an approximate per-band space-time
-// calculation that just sums the totals; ie. it assumes all samples are
-// the same distance apart).
-
typedef
struct {
- Int ms_time; // Int: must allow -1
- SizeT total_szB; // Size of all allocations at that census time
- }
- Census;
+ Int ms_time; // Int: must allow -1.
+ SizeT total_szB; // Size of all allocations at that snapshot time.
+ SizeT heap_admin_szB;
+ SizeT heap_szB;
+ SizeT stacks_szB;
+ XPt* alloc_xpt; // Heap XTree root, if a detailed snapshot,
+ } // otherwise NULL
+ Snapshot;
// Metadata for heap blocks. Each one contains a pointer to a bottom-XPt,
// which is a foothold into the XCon at which it was allocated. From
@@ -343,6 +336,8 @@
// - 1,800 top-XPts
static UInt n_xpts = 0;
+static UInt n_dupd_xpts = 0;
+static UInt n_dupd_xpts_freed = 0;
static UInt n_allocs = 0;
static UInt n_zero_allocs = 0;
static UInt n_frees = 0;
@@ -351,8 +346,8 @@
static UInt n_getXCon_redo = 0;
static UInt n_halvings = 0;
-static UInt n_real_censi = 0;
-static UInt n_fake_censi = 0;
+static UInt n_real_snapshots = 0;
+static UInt n_fake_snapshots = 0;
/*------------------------------------------------------------*/
/*--- Globals ---*/
@@ -373,7 +368,7 @@
static SSizeT sigstacks_szB = 0; // Current signal stacks space sum
static SSizeT heap_szB = 0; // Live heap size
static SSizeT peak_heap_szB = 0;
-static SSizeT peak_census_total_szB = 0;
+static SSizeT peak_snapshot_total_szB = 0;
static VgHashTable malloc_list = NULL; // HP_Chunks
@@ -393,7 +388,7 @@
// operator new[](unsigned, std::nothrow_t const&)
// But someone might be interested in seeing them. If they're not, they can
// specify them with --alloc-fn.
-static UInt n_alloc_fns = 10;
+static UInt n_alloc_fns = 6;
static Char* alloc_fns[MAX_ALLOC_FNS] = {
"malloc",
"__builtin_new",
@@ -743,7 +738,216 @@
alloc_xpt->curr_szB += space_delta;
}
+
/*------------------------------------------------------------*/
+/*--- Taking a snapshot ---*/
+/*------------------------------------------------------------*/
+
+static Snapshot snapshots[MAX_N_SNAPSHOTS];
+static UInt curr_snapshot = 0; // Points to where next snapshot will go.
+
+static UInt ms_interval;
+
+// Weed out half the snapshots; we choose those that represent the smallest
+// time-spans, because that loses the least information.
+//
+// Algorithm for N snapshots: We find the snapshot representing the smallest
+// timeframe, and remove it. We repeat this until (N/2)-1 snapshots are gone.
+// (It's (N/2)-1 because we never remove the first and last snapshots.)
+// We have to do this one snapshot at a time, rather than finding the (N/2)-1
+// smallest snapshots in one hit, because when a snapshot is removed, its
+// neighbours immediately cover greater timespans. So it's N^2, but N is
+// small, and it's not done very often.
+static void halve_snapshots(void)
+{
+ Int i, jp, j, jn;
+ Snapshot* min_snapshot;
+
+ n_halvings++;
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "Halving snapshots...");
+
+ // Sets j to the index of the first not-yet-removed snapshot at or after i
+ #define FIND_SNAPSHOT(i, j) \
+ for (j = i; j < MAX_N_SNAPSHOTS && -1 == snapshots[j].ms_time; j++) { }
+
+ for (i = 2; i < MAX_N_SNAPSHOTS; i += 2) {
+ // Find the snapshot representing the smallest timespan. The timespan
+ // for snapshot n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
+ // snapshot A and B. We don't consider the first and last snapshots for
+ // removal.
+ Int min_span = 0x7fffffff;
+ Int min_j = 0;
+
+ // Initial triple: (prev, curr, next) == (jp, j, jn)
+ jp = 0;
+ FIND_SNAPSHOT(1, j);
+ FIND_SNAPSHOT(j+1, jn);
+ while (jn < MAX_N_SNAPSHOTS) {
+ Int timespan = snapshots[jn].ms_time - snapshots[jp].ms_time;
+ tl_assert(timespan >= 0);
+ if (timespan < min_span) {
+ min_span = timespan;
+ min_j = j;
+ }
+ // Move on to next triple
+ jp = j;
+ j = jn;
+ FIND_SNAPSHOT(jn+1, jn);
+ }
+ // We've found the least important snapshot, now remove it
+ min_snapshot = & snapshots[ min_j ];
+ min_snapshot->ms_time = -1;
+
+ // XXX: free XTree if present...
+ // free_XTree(min_snapshot->alloc_xpt)
+ }
+
+ // Slide down the remaining snapshots over the removed ones. The '<=' is
+ // because we are removing on (N/2)-1, rather than N/2.
+ for (i = 0, j = 0; i <= MAX_N_SNAPSHOTS / 2; i++, j++) {
+ FIND_SNAPSHOT(j, j);
+ if (i != j) {
+ snapshots[i] = snapshots[j];
+ }
+ }
+ curr_snapshot = i;
+
+ // Double intervals
+ ms_interval *= 2;
+
+ if (VG_(clo_verbosity) > 1)
+ VG_(message)(Vg_DebugMsg, "...done");
+}
+
+// XXX: taking a full snapshot... could/should just snapshot the significant
+// parts. Nb: then the amounts wouldn't add up, unless I represented the
+// "other insignificant places" in XPts.
+static XPt* dup_XTree(XPt* xpt)
+{
+ Int i;
+ XPt* dup_xpt = VG_(malloc)(sizeof(XPt));
+ dup_xpt->ip = xpt->ip;
+ dup_xpt->curr_szB = xpt->curr_szB;
+ dup_xpt->parent = xpt->parent;
+ dup_xpt->n_children = xpt->n_children;
+ dup_xpt->max_children = xpt->n_children; // Nb: don't copy max_children!
+ dup_xpt->children = VG_(malloc)(dup_xpt->max_children * sizeof(XPt*));
+ for (i = 0; i < xpt->n_children; i++) {
+ dup_xpt->children[i] = dup_XTree(xpt->children[i]);
+ }
+
+ n_dupd_xpts++;
+
+ return dup_xpt;
+}
+
+static void free_XTree(XPt* xpt)
+{
+ Int i;
+ // Free all children XPts, then the children array, then the XPt itself.
+ for (i = 0; i < xpt->n_children; i++) {
+ free_XTree(xpt->children[i]);
+ }
+ VG_(free)(xpt->children); xpt->children = NULL;
+ VG_(free)(xpt); xpt = NULL;
+
+ n_dupd_xpts_freed++;
+}
+
+
+// Take a snapshot. Note that with bigger depths, snapshots can be slow,
+// eg. konqueror snapshots can easily take 50ms!
+// [XXX: is that still true?]
+static void take_snapshot(void)
+{
+ static UInt ms_prev_snapshot = 0;
+ static UInt ms_next_snapshot = 0; // zero allows startup snapshot
+ static Int n_snapshots_since_last_detailed = 0;
+
+ Int ms_time, ms_time_since_prev;
+ Snapshot* snapshot;
+
+ // Only do a snapshot if it's time.
+ ms_time = VG_(read_millisecond_timer)();
+ ms_time_since_prev = ms_time - ms_prev_snapshot;
+ if (ms_time < ms_next_snapshot) {
+ n_fake_snapshots++;
+ return;
+ }
+
+ // Right! We're taking a real snapshot.
+ n_real_snapshots++;
+ snapshot = & snapshots[curr_snapshot];
+
+ // Heap -------------------------------------------------------------
+ if (clo_heap) {
+ snapshot->heap_szB = heap_szB;
+ // Take a detailed snapshot if it's been long enough since the last one.
+ if (DETAILED_SNAPSHOT_FREQ == n_snapshots_since_last_detailed) {
+ snapshot->alloc_xpt = dup_XTree(alloc_xpt);
+ n_snapshots_since_last_detailed = 0;
+ } else {
+ n_snapshots_since_last_detailed++;
+ }
+ }
+
+ // Heap admin -------------------------------------------------------
+ if (clo_heap_admin > 0) {
+ snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks;
+ }
+
+ // Stack(s) ---------------------------------------------------------
+ if (clo_stacks) {
+ ThreadId tid;
+ Addr stack_min, stack_max;
+ VG_(thread_stack_reset_iter)();
+ while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) {
+ snapshot->stacks_szB += (stack_max - stack_min);
+ }
+ snapshot->stacks_szB += sigstacks_szB; // Add signal stacks, too
+ }
+
+ // Finish writing snapshot ------------------------------------------
+ snapshot->ms_time = ms_time;
+ snapshot->total_szB =
+ snapshot->heap_szB + snapshot->heap_admin_szB + snapshot->stacks_szB;
+
+ // Update peak data -------------------------------------------------
+ // XXX: this is not really the right way to do peak data -- it's only
+ // peak snapshot data, the true peak could be between snapshots.
+ if (snapshot->total_szB > peak_snapshot_total_szB) {
+ peak_snapshot_total_szB = snapshot->total_szB;
+// VG_(printf)("new peak snapshot total szB = %ld B\n",
+// peak_snapshot_total_szB);
+ }
+
+// VG_(printf)("heap, admin, stacks: %ld, %ld, %ld B\n",
+// snapshot_heap_szB, snapshot_heap_admin_szB, snapshot_stacks_szB);
+
+ // Clean-ups
+ curr_snapshot++;
+ snapshot = NULL; // don't use again now that curr_snapshot changed
+
+ // Halve the entries, if our snapshot table is full
+ if (MAX_N_SNAPSHOTS == curr_snapshot) {
+ halve_snapshots();
+ }
+
+ // Take time for next snapshot from now, rather than when this snapshot
+ // should have happened. Because, if there's a big gap due to a kernel
+ // operation, there's no point doing catch-up snapshots every allocation
+ // for a while -- that would just give N snapshots at almost the same time.
+ if (VG_(clo_verbosity) > 1) {
+ VG_(message)(Vg_DebugMsg, "snapshot: %d ms (took %d ms)", ms_time,
+ VG_(read_millisecond_timer)() - ms_time );
+ }
+ ms_prev_snapshot = ms_time;
+ ms_next_snapshot = ms_time + ms_interval;
+}
+
+
+/*------------------------------------------------------------*/
/*--- Heap management ---*/
/*------------------------------------------------------------*/
@@ -758,10 +962,6 @@
}
}
-
-// Forward declaration
-static void hp_census(void);
-
static
void* new_block ( ThreadId tid, void* p, SizeT size, SizeT align,
Bool is_zeroed )
@@ -799,8 +999,8 @@
}
VG_(HT_add_node)(malloc_list, hc);
- // Do a census!
- hp_census();
+ // Do a snapshot!
+ take_snapshot();
return p;
}
@@ -832,8 +1032,8 @@
if (!custom_free)
VG_(cli_free)( p );
- // Do a census!
- hp_census();
+ // Do a snapshot!
+ take_snapshot();
}
static __inline__
@@ -945,181 +1145,6 @@
/*------------------------------------------------------------*/
-/*--- Taking a census ---*/
-/*------------------------------------------------------------*/
-
-static Census censi[MAX_N_CENSI];
-static UInt curr_census = 0; // Points to where next census will go.
-
-static UInt ms_interval;
-static UInt do_every_nth_census = 30;
-
-// Weed out half the censi; we choose those that represent the smallest
-// time-spans, because that loses the least information.
-//
-// Algorithm for N censi: We find the census representing the smallest
-// timeframe, and remove it. We repeat this until (N/2)-1 censi are gone.
-// (It's (N/2)-1 because we never remove the first and last censi.)
-// We have to do this one census at a time, rather than finding the (N/2)-1
-// smallest censi in one hit, because when a census is removed, it's
-// neighbours immediately cover greater timespans. So it's N^2, but N only
-// equals 200, and this is only done every 100 censi, which is not too often.
-static void halve_censi(void)
-{
- Int i, jp, j, jn;
- Census* min_census;
-
- n_halvings++;
- if (VG_(clo_verbosity) > 1)
- VG_(message)(Vg_UserMsg, "Halving censi...");
-
- // Sets j to the index of the first not-yet-removed census at or after i
- #define FIND_CENSUS(i, j) \
- for (j = i; j < MAX_N_CENSI && -1 == censi[j].ms_time; j++) { }
-
- for (i = 2; i < MAX_N_CENSI; i += 2) {
- // Find the censi representing the smallest timespan. The timespan
- // for census n = d(N-1,N)+d(N,N+1), where d(A,B) is the time between
- // censi A and B. We don't consider the first and last censi for
- // removal.
- Int min_span = 0x7fffffff;
- Int min_j = 0;
-
- // Initial triple: (prev, curr, next) == (jp, j, jn)
- jp = 0;
- FIND_CENSUS(1, j);
- FIND_CENSUS(j+1, jn);
- while (jn < MAX_N_CENSI) {
- Int timespan = censi[jn].ms_time - censi[jp].ms_time;
- tl_assert(timespan >= 0);
- if (timespan < min_span) {
- min_span = timespan;
- min_j = j;
- }
- // Move on to next triple
- jp = j;
- j = jn;
- FIND_CENSUS(jn+1, jn);
- }
- // We've found the least important census, now remove it
- min_census = & censi[ min_j ];
- min_census->ms_time = -1;
- }
-
- // Slide down the remaining censi over the removed ones. The '<=' is
- // because we are removing on (N/2)-1, rather than N/2.
- for (i = 0, j = 0; i <= MAX_N_CENSI / 2; i++, j++) {
- FIND_CENSUS(j, j);
- if (i != j) {
- censi[i] = censi[j];
- }
- }
- curr_census = i;
-
- // Double intervals
- ms_interval *= 2;
- do_every_nth_census *= 2;
-
- if (VG_(clo_verbosity) > 1)
- VG_(message)(Vg_UserMsg, "...done");
-}
-
-// Forward declaration.
-// XXX: necessary?
-static void pp_snapshot(SizeT curr_heap_szB, SizeT curr_heap_admin_szB,
- SizeT curr_stacks_szB, SizeT curr_total_szB);
-
-
-// Take a census. Census time seems to be insignificant (usually <= 0 ms,
-// almost always <= 1ms) so don't have to worry about subtracting it from
-// running time in any way.
-//
-// XXX: NOT TRUE! with bigger depths, konqueror censuses can easily take
-// 50ms!
-static void hp_census(void)
-{
- static UInt ms_prev_census = 0;
- static UInt ms_next_census = 0; // zero allows startup census
-
- SSizeT census_heap_szB = 0;
- SSizeT census_heap_admin_szB = 0;
- SSizeT census_stacks_szB = 0;
-
- Int ms_time, ms_time_since_prev;
- Census* census;
-
- // Only do a census if it's time
- ms_time = VG_(read_millisecond_timer)();
- ms_time_since_prev = ms_time - ms_prev_census;
- if (ms_time < ms_next_census) {
- n_fake_censi++;
- return;
- }
- n_real_censi++;
-
- census = & censi[curr_census];
-
- // Heap -------------------------------------------------------------
- if (clo_heap) {
- census_heap_szB = heap_szB;
- }
-
- // Heap admin -------------------------------------------------------
- if (clo_heap_admin > 0) {
- census_heap_admin_szB = clo_heap_admin * n_heap_blocks;
- }
-
- // Stack(s) ---------------------------------------------------------
- if (clo_stacks) {
- ThreadId tid;
- Addr stack_min, stack_max;
- VG_(thread_stack_reset_iter)();
- while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) {
- census_stacks_szB += (stack_max - stack_min);
- }
- census_stacks_szB += sigstacks_szB; // Add signal stacks, too
- }
-
- // Write out census -------------------------------------------------
- census->ms_time = ms_time;
- census->total_szB =
- census_heap_szB + census_heap_admin_szB + census_stacks_szB;
-// VG_(printf)("heap, admin, stacks: %ld, %ld, %ld B\n",
-// census_heap_szB, census_heap_admin_szB, census_stacks_szB);
- if (census->total_szB > peak_census_total_szB) {
- peak_census_total_szB = census->total_szB;
- VG_(printf)("new peak census total szB = %ld B\n", peak_census_total_szB);
- }
-
- // Print the significant part of the XTree [XXX: temporary]
- if (clo_heap)
- pp_snapshot(census_heap_szB, census_heap_admin_szB,
- census_stacks_szB, census->total_szB);
-
- // Clean-ups
- curr_census++;
- census = NULL; // don't use again now that curr_census changed
-
- // Halve the entries, if our census table is full
- if (MAX_N_CENSI == curr_census) {
- halve_censi();
- }
-
- // Take time for next census from now, rather than when this census
- // should have happened. Because, if there's a big gap due to a kernel
- // operation, there's no point doing catch-up censi every allocation for
- // a while -- that would just give N censi at almost the same time.
- if (VG_(clo_verbosity) > 1) {
- VG_(message)(Vg_UserMsg, "census: %d ms (took %d ms)", ms_time,
- VG_(read_millisecond_timer)() - ms_time );
- }
- ms_prev_census = ms_time;
- ms_next_census = ms_time + ms_interval;
-
- //VG_(printf)("Next: %d ms\n", ms_next_census);
-}
-
-/*------------------------------------------------------------*/
/*--- Tracked events ---*/
/*------------------------------------------------------------*/
@@ -1177,7 +1202,7 @@
if (is_first_SB) {
// Do an initial sample for t = 0
- hp_census();
+ take_snapshot();
is_first_SB = False;
}
@@ -1200,22 +1225,6 @@
return filename;
}
-// Make string acceptable to hp2ps (sigh): remove spaces, escape parentheses.
-static Char* clean_fnname(Char *d, Char* s)
-{
- Char* dorig = d;
- while (*s) {
- if (' ' == *s) { *d = '%'; }
- else if ('(' == *s) { *d++ = '\\'; *d = '('; }
- else if (')' == *s) { *d++ = '\\'; *d = ')'; }
- else { *d = *s; };
- s++;
- d++;
- }
- *d = '\0';
- return dorig;
-}
-
static void file_err ( Char* file )
{
VG_(message)(Vg_UserMsg, "error: can't open output file '%s'", file );
@@ -1227,14 +1236,13 @@
static void write_text_graph(void)
{
- Int i /*,j*/;
+ Int i;
Int x, y; // y must be signed!
Int end_ms_time;
Char unit;
Int orders_of_magnitude;
- SizeT peak_census_total_szScaled;
+ SizeT peak_snapshot_total_szScaled;
-
// XXX: unhardwire the sizes later
#define GRAPH_X 72
#define GRAPH_Y 20
@@ -1245,77 +1253,59 @@
// The rest ([1][1]..[GRAPH_X][GRAPH_Y]) is the usable graph area.
Char graph[GRAPH_X+1][GRAPH_Y+1];
- // Setup the graph
- graph[0][0] = '+'; // axes join point
- for (x = 1; x <= GRAPH_X; x++) { // x-axis
- graph[x][0] = '-';
- }
- for (y = 1; y <= GRAPH_Y; y++) { // y-axis
- graph[0][y] = '|';
- }
- for (x = 1; x <= GRAPH_X; x++) { // usable area
- for (y = 1; y <= GRAPH_Y; y++) {
- graph[x][y] = ' ';
- }
- }
-
- // We increment end_ms_time by 1 so that the last census occurs just
+ // We increment end_ms_time by 1 so that the last snapshot occurs just
// before it, and doesn't spill over into the final column.
- tl_assert(curr_census > 0);
- end_ms_time = censi[curr_census-1].ms_time + 1;
+ tl_assert(curr_snapshot > 0);
+ end_ms_time = snapshots[curr_snapshot-1].ms_time + 1;
tl_assert(end_ms_time > 0);
- tl_assert(peak_census_total_szB > 0);
- P("end time = %d ms\n", end_ms_time);
- P("peak census total szB = %ld B\n", peak_census_total_szB);
- // Header, including command line
- P("cmd: ");
- if (VG_(args_the_exename)) {
- P("%s", VG_(args_the_exename));
- for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
- HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
- if (arg)
- P(" %s", arg);
+ // Setup graph[][].
+ graph[0][0] = '+'; // axes join point
+ for (x = 1; x <= GRAPH_X; x++) { graph[x][0] = '-'; } // x-axis
+ for (y = 1; y <= GRAPH_Y; y++) { graph[0][y] = '|'; } // y-axis
+ for (x = 1; x <= GRAPH_X; x++) { // usable area
+ for (y = 1; y <= GRAPH_Y; y++) {
+ graph[x][y] = ' ';
}
- } else {
- P(" ???");
}
- P("\n");
- // Censi
- for (i = 0; i < curr_census; i++) {
- Census* census = & censi[i];
+ // Write snapshot bars into graph[][].
+ // XXX: many detailed snapshot bars are being overwritten by
+ for (i = 0; i < curr_snapshot; i++) {
+ Snapshot* snapshot = & snapshots[i];
+ Bool is_detailed = ( snapshot->alloc_xpt ? True : False );
// Work out how many bytes each row represents.
- double per_row_full_thresh_szB = (double)peak_census_total_szB / GRAPH_Y;
+ double per_row_full_thresh_szB = (double)peak_snapshot_total_szB / GRAPH_Y;
double per_row_half_thresh_szB = per_row_full_thresh_szB / 2;
- // Work out which column this census belongs to.
- double bar_x_pos_frac = ((double)census->ms_time / end_ms_time) * GRAPH_X;
- int bar_x_pos = (int)bar_x_pos_frac + 1; // +1 due to y-axis
- // XXX: why is the 0 one not getting drawn?
- P("n: %d\n", bar_x_pos);
- tl_assert(1 <= bar_x_pos && bar_x_pos <= GRAPH_X);
+ // Work out which column this snapshot belongs to.
+ double x_pos_frac = ((double)snapshot->ms_time / end_ms_time) * GRAPH_X;
+ x = (int)x_pos_frac + 1; // +1 due to y-axis
- // Grow this census bar from bottom to top.
+ // Grow this snapshot bar from bottom to top.
for (y = 1; y <= GRAPH_Y; y++) {
double this_row_full_thresh_szB = y * per_row_full_thresh_szB;
double this_row_half_thresh_szB =
this_row_full_thresh_szB - per_row_half_thresh_szB;
- graph[bar_x_pos][y] = ' ';
- if (census->total_szB >= this_row_half_thresh_szB)
- graph[bar_x_pos][y] = '.';
- if (census->total_szB >= this_row_full_thresh_szB)
- graph[bar_x_pos][y] = ':';
+ graph[x][y] = ' ';
+ if (snapshot->total_szB >= this_row_half_thresh_szB)
+ graph[x][y] = '.';
+ if (snapshot->total_szB >= this_row_full_thresh_szB)
+ graph[x][y] = ( is_detailed ? '|' : ':' );
}
+ // If it's detailed, mark the x-axis
+ if (is_detailed)
+ graph[x][0] = '|';
}
+ // Work out the units for the y-axis.
orders_of_magnitude = 0;
- peak_census_total_szScaled = peak_census_total_szB;
- while (peak_census_total_szScaled > 1000) {
+ peak_snapshot_total_szScaled = peak_snapshot_total_szB;
+ while (peak_snapshot_total_szScaled > 1000) {
orders_of_magnitude++;
- peak_census_total_szScaled /= 1000;
+ peak_snapshot_total_szScaled /= 1000;
}
switch (orders_of_magnitude) {
case 0: unit = ' '; break;
@@ -1327,12 +1317,28 @@
tl_assert2(0, "unknown order of magnitude: %d", orders_of_magnitude);
}
- // Print graph
+ // Print graph header, including command line.
+ P("-- start graph header --\n");
+ P("cmd: ");
+ if (VG_(args_the_exename)) {
+ P("%s", VG_(args_the_exename));
+ for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
+ HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
+ if (arg)
+ P(" %s", arg);
+ }
+ } else {
+ P(" ???");
+ }
+ P("\n");
+ P("-- end graph header --\n");
+
+ // Print graph[][].
P("-- start graph --\n");
for (y = GRAPH_Y; y >= 0; y--) {
// Row prefix
if (GRAPH_Y == y) // top point
- P("%3lu%c", peak_census_total_szScaled, unit);
+ P("%3lu%c", peak_snapshot_total_szScaled, unit);
else if (0 == y) // bottom point
P(" 0 ");
else // anywhere else
@@ -1345,29 +1351,24 @@
P("\n");
}
P("-- end graph --\n");
+
+ // Print graph legend.
+ P("-- start graph legend --\n");
+ for (i = 0; i < curr_snapshot; i++) {
+ Snapshot* snapshot = & snapshots[i];
+ Bool is_detailed = ( snapshot->alloc_xpt ? True : False );
+ if (is_detailed) {
+ P(" snapshot %3d, t = %,12d ms, size = %,12ld bytes\n",
+ i, snapshot->ms_time, snapshot->total_szB);
+ }
+ }
+ P("-- end graph legend --\n");
}
/*------------------------------------------------------------*/
-/*--- Writing the output ---*/
+/*--- Writing snapshots ---*/
/*------------------------------------------------------------*/
-#if 0
-static void percentify(Int n, Int pow, Int field_width, char xbuf[])
-{
- int i, len, space;
-
- VG_(sprintf)(xbuf, "%d.%d%%", n / pow, n % pow);
- len = VG_(strlen)(xbuf);
- space = field_width - len;
- if (space < 0) space = 0; /* Allow for v. small field_width */
- i = len;
-
- /* Right justify in field */
- for ( ; i >= 0; i--) xbuf[i + space] = xbuf[i];
- for (i = 0; i < space; i++) xbuf[i] = ' ';
-}
-#endif
-
// Nb: uses a static buffer, each call trashes the last string returned.
static Char* make_perc(ULong x, ULong y)
{
@@ -1381,35 +1382,6 @@
return mbuf;
}
-#if 0
-// Nb: passed in XPt is a lower-level XPt; IPs are grabbed from
-// bottom-to-top of XCon, and then printed in the reverse order.
-static UInt pp_XCon(XPt* xpt)
-{
- Addr rev_ips[clo_depth+1];
- Int i = 0;
- Int n = 0;
-
- tl_assert(NULL != xpt);
-
- while (True) {
- rev_ips[i] = xpt->ip;
- n++;
- if (alloc_xpt == xpt->parent) break;
- i++;
- xpt = xpt->parent;
- }
-
- for (i = n-1; i >= 0; i--) {
- // -1 means point to calling line
- VG_(describe_IP)(rev_ips[i]-1, buf2, BUF_LEN);
- P(" %s\n", buf2);
- }
-
- return n;
-}
-#endif
-
// Does the xpt account for >= 1% of total memory used?
// XXX: make command-line controllable?
static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
@@ -1426,8 +1398,6 @@
Bool child_is_last_sibling;
Char* ip_desc, *perc;
- // XXX: don't want to see 0xFFFFFFFE entries
-
// Check that the sum of all children's sizes equals the parent's size.
SizeT children_sum_szB = 0;
for (i = 0; i < parent->n_children; i++) {
@@ -1488,6 +1458,8 @@
UInt n_insig = parent->n_children - i;
Char* s = ( n_insig == 1 ? "" : "s" );
Char* other = ( 0 == i ? "" : "other " );
+ // XXX: should give the percentage. be careful when computing
+ // it...
P("->the rest in %d %sinsignificant place%s\n", n_insig, other, s);
P("%s\n", depth_str);
return;
@@ -1495,40 +1467,52 @@
}
}
-static void pp_snapshot(SizeT curr_heap_szB, SizeT curr_heap_admin_szB,
- SizeT curr_stacks_szB, SizeT curr_total_szB)
+static void pp_snapshot(Snapshot* snapshot, Int snapshot_n)
{
- Int depth_str_len = clo_depth * 2 + 2;
+ Int depth_str_len = clo_depth * 2 + 2;
Char* depth_str = VG_(malloc)(sizeof(Char) * depth_str_len);
depth_str[0] = '\0'; // Initialise to "".
P("=================================\n");
- P("== snapshot\n");
+ P("== snapshot %d\n", snapshot_n);
P("=================================\n");
- P("Total memory usage: %,12lu bytes\n", curr_total_szB);
+ P("Total memory usage: %,12lu bytes\n", snapshot->total_szB);
P("Useful heap usage : %,12lu bytes (%s)\n",
- curr_heap_szB, make_perc(curr_heap_szB, curr_total_szB));
+ snapshot->heap_szB,
+ make_perc(snapshot->heap_szB, snapshot->total_szB));
P("Admin heap usage : %,12lu bytes (%s)\n",
- curr_heap_admin_szB, make_perc(curr_heap_admin_szB, curr_total_szB));
+ snapshot->heap_admin_szB,
+ make_perc(snapshot->heap_admin_szB, snapshot->total_szB));
P("Stacks usage : %,12lu bytes (%s)\n",
- curr_stacks_szB, make_perc(curr_stacks_szB, curr_total_szB));
+ snapshot->stacks_szB,
+ make_perc(snapshot->stacks_szB, snapshot->total_szB));
- if (0 == curr_heap_szB) {
+ if (0 == snapshot->heap_szB) {
P("(No heap memory currently allocated)\n");
} else {
P("Heap tree:\n");
P("%6s: (heap allocation functions) malloc/new/new[],"
" --alloc-fn functions, etc.\n",
- make_perc(curr_heap_szB, curr_total_szB));
+ make_perc(snapshot->heap_szB, snapshot->total_szB));
pp_snapshot_child_XPts(alloc_xpt, 0, depth_str, depth_str_len,
- curr_heap_szB, curr_total_szB);
+ snapshot->heap_szB, snapshot->total_szB);
}
- P("\n");
VG_(free)(depth_str);
}
+static void write_snapshots(void)
+{
+ Int i;
+ for (i = 0; i < curr_snapshot; i++) {
+ Snapshot* snapshot = & snapshots[i];
+ if (snapshot->alloc_xpt) {
+ pp_snapshot(snapshot, i); // Detailed snapshot!
+ }
+ }
+}
+
/*------------------------------------------------------------*/
/*--- Finalisation ---*/
/*------------------------------------------------------------*/
@@ -1536,27 +1520,29 @@
static void ms_fini(Int exit_status)
{
// Do a final (empty) sample to show program's end
- hp_census();
+ take_snapshot();
// Output.
write_text_graph();
+ write_snapshots();
// Stats
if (VG_(clo_verbosity) > 1) {
tl_assert(n_xpts > 0); // always have alloc_xpt
- VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
- VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
- n_zero_allocs * 100 / n_allocs );
- VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
- VG_(message)(Vg_DebugMsg, " XPts: %u (%d B)", n_xpts,
- n_xpts*sizeof(XPt));
- VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)", alloc_xpt->n_children,
- alloc_xpt->n_children * 100 / n_xpts);
- VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
- VG_(message)(Vg_DebugMsg, "fake censi: %u", n_fake_censi);
- VG_(message)(Vg_DebugMsg, "real censi: %u", n_real_censi);
- VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
- VG_(message)(Vg_DebugMsg, "XCon_redos: %u", n_getXCon_redo);
+ VG_(message)(Vg_DebugMsg, " allocs: %u", n_allocs);
+ VG_(message)(Vg_DebugMsg, "zeroallocs: %u (%d%%)", n_zero_allocs,
+ n_zero_allocs * 100 / n_allocs );
+ VG_(message)(Vg_DebugMsg, " frees: %u", n_frees);
+ VG_(message)(Vg_DebugMsg, " XPts: %u", n_xpts);
+ VG_(message)(Vg_DebugMsg, " top-XPts: %u (%d%%)",
+ alloc_xpt->n_children, alloc_xpt->n_children * 100 / n_xpts);
+ VG_(message)(Vg_DebugMsg, "dup'd XPts: %u", n_dupd_xpts);
+ VG_(message)(Vg_DebugMsg, "dup'd/freed XPts:%u", n_dupd_xpts_freed);
+ VG_(message)(Vg_DebugMsg, "c-reallocs: %u", n_children_reallocs);
+ VG_(message)(Vg_DebugMsg, "fake snapshots: %u", n_fake_snapshots);
+ VG_(message)(Vg_DebugMsg, "real snapshots: %u", n_real_snapshots);
+ VG_(message)(Vg_DebugMsg, " halvings: %u", n_halvings);
+ VG_(message)(Vg_DebugMsg, "XCon_redos: %u", n_getXCon_redo);
}
}
@@ -1574,9 +1560,9 @@
}
}
- ms_interval = 1;
+ ms_interval = 5;
- // We don't take a census now, because there's still some core
+ // We don't take a snapshot now, because there's still some core
// initialisation to do, in which case we have an artificial gap.
// Instead we do it when the first translation occurs. See
// ms_instrument().
|
|
From: <sv...@va...> - 2007-03-28 12:16:58
|
Author: njn
Date: 2007-03-28 13:16:55 +0100 (Wed, 28 Mar 2007)
New Revision: 6682
Log:
add a comment
Modified:
trunk/include/pub_tool_libcproc.h
Modified: trunk/include/pub_tool_libcproc.h
===================================================================
--- trunk/include/pub_tool_libcproc.h 2007-03-28 07:36:31 UTC (rev 6681)
+++ trunk/include/pub_tool_libcproc.h 2007-03-28 12:16:55 UTC (rev 6682)
@@ -75,6 +75,9 @@
Timing
------------------------------------------------------------------ */
+// Returns the number of milliseconds passed since the progam started
+// (roughly; it gets initialised partway through Valgrind's initialisation
+// steps).
extern UInt VG_(read_millisecond_timer) ( void );
#endif // __PUB_TOOL_LIBCPROC_H
|
|
From: <js...@ac...> - 2007-03-28 11:07:19
|
Nightly build on minnie ( SuSE 10.0, ppc32 ) started at 2007-03-28 09:00:01 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 219 tests, 10 stderr failures, 6 stdout failures, 0 posttest failures == memcheck/tests/leak-tree (stderr) memcheck/tests/leakotron (stdout) memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_changes (stderr) memcheck/tests/xml1 (stderr) none/tests/faultstatus (stderr) none/tests/fdleak_cmsg (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) none/tests/ppc32/jm-fp (stdout) none/tests/ppc32/jm-fp (stderr) none/tests/ppc32/round (stdout) none/tests/ppc32/round (stderr) none/tests/ppc32/test_fx (stdout) none/tests/ppc32/test_fx (stderr) none/tests/ppc32/test_gx (stdout) |
|
From: <sv...@va...> - 2007-03-28 07:36:36
|
Author: njn
Date: 2007-03-28 08:36:31 +0100 (Wed, 28 Mar 2007)
New Revision: 6681
Log:
- added some notes
- took out the overloadable C++ new/new[] operators from alloc-fns --
someone might want to see them
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-03-28 01:27:05 UTC (rev 6680)
+++ branches/MASSIF2/massif/ms_main.c 2007-03-28 07:36:31 UTC (rev 6681)
@@ -31,6 +31,11 @@
//---------------------------------------------------------------------------
// XXX:
//---------------------------------------------------------------------------
+// Next:
+// - truncate really long file names [hmm, could make getting the name of
+// alloc-fns more difficult]
+// - Check MALLOCLIKE_BLOCK works, write regtest
+//
// Separate content from presentation by dumping all results to a file and
// then post-processing with a separate program, a la Cachegrind?
// - work out the file format
@@ -50,14 +55,15 @@
// 132132 nor massif --format=html output does not do html entity escaping
// - only for HTML output, irrelevant, ignore
//
-// FIXED:
+// FIXED/NOW IRRELEVANT:
// 142197 nor massif tool ignores --massif:alloc-fn parameters in .valg...
// - fixed in trunk
// 142491 nor Maximise use of alloc_fns array
// - addressed, using the patch (with minor changes) from the bug report
+// 89061 cra Massif: ms_main.c:485 (get_XCon): Assertion `xpt->max_chi...
+// - relevant code now gone
//
// TODO:
-// 89061 cra Massif: ms_main.c:485 (get_XCon): Assertion `xpt->max_chi...
// 141631 nor Massif: percentages don't add up correctly
// 142706 nor massif numbers don't seem to add up
// 143062 cra massif crashes on app exit with signal 8 SIGFPE
@@ -82,7 +88,19 @@
// matches an alloc-fn, that entry *and all above it* are removed. So you
// can cut out allc-fn chains at the bottom, rather than having to name
// all of them, which is better.
+// - Mention that the C++ overloadable new/new[] operators aren't include in
+// alloc-fns by default.
+// - Mention that complex functions names are best protected with single
+// quotes, eg:
+// --alloc-fn='operator new(unsigned, std::nothrow_t const&)'
+// [XXX: that doesn't work if the option is in a .valgrindrc file or in
+// $VALGRIND_OPTS. In m_commandline.c:add_args_from_string() need to
+// respect single quotes...]
//
+// Tests:
+// - tests/overloaded_new.cpp is there
+// - one involving MALLOCLIKE
+//
//---------------------------------------------------------------------------
// Memory profiler. Produces a graph, gives lots of information about
@@ -118,9 +136,20 @@
// Heap blocks are tracked, and the amount of space allocated by various
// contexts (ie. lines of code, more or less) is also tracked.
-// Periodically, a census is taken. There are two
-//
-//
+// "Snapshots", ie. detailed recordings of the memory usage, are taken every
+// so often. There are two kinds of snapshot:
+// - Temporary: Massif does a temporary snapshot every so often. The idea
+// is to always have a certain number of temporary snapshots around. So
+// we take them frequently to begin with, but decreasingly often as the
+// program continues to run. Also, we remove some old ones after a while.
+// Overall it's a kind of exponential decay thing.
+// - Permanent: Massif takes a permanent snapshot in some circumstances.
+// They are:
+// - Peak snapshot: When the memory usage peak is reached, it takes a
+// snapshot. It keeps this, unless the peak is subsequently exceeded,
+// in which case it will overwrite the peak snapshot.
+// - User-requested snapshots: These are done in response to client
+// requests. They are always kept.
//
// 100M|B . :A
// | .::: :::#
@@ -137,14 +166,24 @@
// | :::::|:::::::|:::#:::|:::::: g::::::::
// | a:::::|:::::::|:::#:::|:::::::e: ::|::::::::::h
// | |:::::|:::::::|:::#:::|:::::::|::. :: .:::|::::::::::|::
-// 25M^| |:::::|:::::::|:::#:::|:::::::|:::: f:::::::|::::::::::|::
-// 20M| :|:::::|:::::::|:::#:::|:::::::|::::. .::|:::::::|::::::::::|::
-// 15M| .::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
-// 10M| .::::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
-// 5M|:::::::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
+// 25M| |:::::|:::::::|:::#:::|:::::::|:::: f:::::::|::::::::::|::
+// | :|:::::|:::::::|:::#:::|:::::::|::::. .::|:::::::|::::::::::|::
+// | .::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
+// | .::::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
+// |:::::::|:::::|:::::::|:::#:::|:::::::|::::::::::|:::::::|::::::::::|::
// 0M+----------------------------------------------------------------------t
// 012
//
+// Temporary snapshots:
+// a: periodic snapshot, total size: 33,000,000 bytes
+// b: periodic snapshot, total size: ... bytes
+// c: periodic snapshot, total size: ... bytes
+// d: periodic snapshot, total size: ... bytes
+// e: periodic snapshot, total size: ... bytes
+// f: periodic snapshot, total size: ... bytes
+// g: periodic snapshot, total size: ... bytes
+// h: periodic snapshot, total size: ... bytes
+//
// Explanation of y-axis:
// - Top of the x-axis box represents 0.
//
@@ -168,54 +207,6 @@
// - etc.
-
-
-// Ideas:
-// - Graph has 72 columns of data, 0..71
-// - Once we've had the first 72, always have 72.
-// - On number 73, remove column 1, (0,2..71)
-// - On number 74, remove column 2, (0,2,4..71)
-// - On number 75, remove column 3, (0,2,4,6..71)
-// - On number 72+n, remove col n, (0,2,4,6,...,2n..71)
-// - ...
-// - On number 107, remove column 35, (0,2,4,6,...,70..71)
-//
-
-// column: 00 01 02 03 04 05 06 07 08 09 10 11 12
-// census --------------------------------------
-// 00 00
-// 01 00 01
-// 02 00 01 02
-// ...
-// 11 00 01 02 03 04 05 06 07 08 09 10 11
-// 12 00 01 02 03 04 05 06 07 08 09 10 11 12
-// 13 00 02 03 04 05 06 07 08 09 10 11 12 13 removed col 01
-// 14 00 02 04 05 06 07 08 09 10 11 12 13 14 removed col 02
-// 15 00 02 04 06 07 08 09 10 11 12 13 14 15 removed col 03
-// 16 00 02 04 06 08 09 10 11 12 13 14 15 16 removed col 04
-// 17 00 02 04 06 08 10 11 12 13 14 15 16 17 removed col 05
-// 18 00 02 04 06 08 10 12 13 14 15 16 17 18 removed col 06
-// 19 00 02 04 06 08 10 12 14 15 16 17 18 19 removed col 07
-// 20 00 02 04 06 08 10 12 14 16 17 18 19 20 removed col 08
-// 21 00 02 04 06 08 10 12 14 16 18 19 20 21 removed col 09
-// 22 00 02 04 06 08 10 12 14 16 18 20 21 22 removed col 10
-// 23 00 02 04 06 08 10 12 14 16 18 20 22 23 removed col 11
-// 24 00 02 04 06 08 10 12 14 16 18 20 22 24 removed col 12
-//
-// Problem with this is that we don't have an even x-axis distribution.
-// Getting such a distribution is difficult in general.
-//
-//
-
-
-
-// - like Callgrind, allow multiple data sets to be dumped, by choosing points.
-// That way you could print one graph per phase, for example, for better
-// granularity.
-
-
-
-
// Periodically, a census is taken, and the amount of space used, at that
// point, by the most significant (highly allocating) contexts is recorded.
// Census start off frequently, but are scaled back as the program goes on,
@@ -394,15 +385,17 @@
#define MAX_ALLOC_FNS 128 // includes the builtin ones
// First few filled in, rest should be zeroed. Zero-terminated vector.
+// Nb: I used to have the following four C++ global overloadable allocators
+// in alloc_fns:
+// operator new(unsigned)
+// operator new[](unsigned)
+// operator new(unsigned, std::nothrow_t const&)
+// operator new[](unsigned, std::nothrow_t const&)
+// But someone might be interested in seeing them. If they're not, they can
+// specify them with --alloc-fn.
static UInt n_alloc_fns = 10;
static Char* alloc_fns[MAX_ALLOC_FNS] = {
"malloc",
- // XXX: maybe these four shouldn't be in here? Someone might want to see
- // inside them...
- "operator new(unsigned)",
- "operator new[](unsigned)",
- "operator new(unsigned, std::nothrow_t const&)",
- "operator new[](unsigned, std::nothrow_t const&)",
"__builtin_new",
"__builtin_vec_new",
"calloc",
|
|
From: Nicholas N. <nj...@cs...> - 2007-03-28 03:46:34
|
On Tue, 27 Mar 2007, Vince Weaver wrote: > I was reading the PLDI paper and just wanted to say it's great work. I > wish all this info were available last year when I first started writing > some valgrind tools. Thanks! I see your tools at http://www.csl.cornell.edu/~vince/software.html, are they publically available? > One thing, in the paper it is mentioned that galgel wouldn't compile with > gfortran. The secret to getting it to compile is the > "-ffixed-form" > option. Ah, of course :/ Good to know for the future, thanks. > Also, in previous e-mails when I was complaining about code quality in the > spec2k benchmarks, I was of course excepting bzip2 in that analysis ;) I too have never had any problems with bzip2, unlike many of the others. Nick |
|
From: Nicholas N. <nj...@cs...> - 2007-03-28 03:33:21
|
Hi all,
Julian and I have been discussing a list of Valgrind design and development
goals. We're thinking this would be suitable for putting on the website
somewhere. This started after I looked at GCC's design goals (which are
really vague and thus not much good to anyone), and I realised we didn't
have any written down.
Below is a tentative statement of Valgrind design and development goals
and rationales for discussion. The rationale for each point is
presented in double parenthese, ((like this)).
There are 10 points. Following those are some background comments
on techniques that have been used to achieve these goals.
The idea is not to set anything in stone, but to give an indication of the
relative priorities, which can serve as a useful guide when considering
changes and new features. Comments are welcome.
Nick
------ Highest priority ------
1 Robustness: should be able to correctly run as many programs as
possible on the platforms we support.
((Systems which cannot be relied on to handle the vast majority of
presented workloads soon fall out of favour with users.))
2 Accuracy of outputs: debugging and profiling information generated
by the tools should be sufficiently accurate as to be both useful to
and credible to users. For example, bug detectors should have
minimal false positives.
((Tools which produce unreliable or non-credible results soon fall
out of favour with users.))
3 Design simplicity: the design and implementation should be easy to
understand, maintain, test and verify.
((Our engineering resources are very limited, so the code base
should be structured to make best use of them. Also, a simple code
base is more accessible to newcomers. Simplicity also usually
helps robustness and accuracy (1 and 2).))
4 Instrumentation capabilities: firstly, provide enough capabilities
to keep Memcheck going. Then conservatively add capabilities as required
by other tools.
((We aim for Valgrind to be an effective framework for building
dynamic analysis tools, so it needs to provide instrumentation
capabilities as required by current and emerging tools.))
------ Medium priority ------
5 Performance (speed and memory usage) of heavyweight tools. This
covers the speed of both the Valgrind core and the tool components.
((All else being equal, faster and less space hungry tools are able
to handle larger workloads and so are more useful. Also, users
prefer fast tools.))
6 Usability: it should be possible for users to use the tools without
excessive complication or inconvenience.
((Tools which are difficult or inconvenient to use soon fall out
of favour with users.))
7 New tools: encourage development of new tools as needs and
opportunities develop.
((The needs of users and the general computing landscape changes
slowly over time, and it is important to remain relevant.))
8 Portability: want to avoid platform-specific techniques.
((Platform specific techniques and assumptions are clearly a
hindrance to portablility. We have also found them to sometimes
reduce stability.))
------ Low priority ------
9 Performance of lightweight tools.
((Heavyweight tools (eg. Memcheck) are both harder to construct and
more valuable to users than simple ones (eg, instruction counters,
memory trace generators), and where design choices conflict they have
usually been resolved in favour of supporting heavyweight tools
better.))
10 New platforms: although we want portability, we don't want to
support a lot of platforms, because it's hard work, especially to
do it to a high quality.
((Same as 3: our engineering resources are limited and so we should
focus effort on the most widely used platforms.))
Here are some techniques that in the past have been used to achieve
the above goals. This list does not claim to be complete.
- Valgrind's code representation (IR) favours powerful instrumentation
capabilities. This allows heavyweight tools such as Memcheck to be
built and have reasonable performance. However, it gives poor
performance for lightweight tools such as trace collectors. Such
lightweight tools will have better performance if written in other
DBI frameworks such as Pin or DynamoRIO.
- Valgrind does not use libc or any other library itself. This used
to not be true, but it caused robustness and portability problems.
- Valgrind makes very few assumptions about memory layout. This used
to not be true, but it caused robustness and portability problems.
For example, Memcheck's two-level shadow memory representation means
its shadow memory can be laid out very flexibly, but it is not
particularly fast. A "half-and-half" representation that stores
shadow memory can be faster, but fails on some programs, some Linux
kernel configurations, and is incompatible with other OSes such as
Mac OS X.
- Valgrind used to use x86 segmentation to prevent a client program
from accidentally clobbering Valgrind data. However, this is not
portable to other platforms, and it was removed once support for
other platforms was added. Having such platform-specific features
hinders portability and makes testing harder.
- Valgrind serialises thread execution. For subtle atomicity reasons,
this is necessary for tools (like Memcheck) that use shadow values.
It means they can not use more than one processor at a time on
multiprocessor machines.
- Assertions and sanity checkers. The code base contains a large
number of assertions. Additionally, many subsystems have sanity
check code which periodically checks important data structures in
detail. In some cases these checkers are permanently enabled, even
at the cost of some performance. These include: the IR intermediate
representation, the address space manager (m_aspacemgr), the
translation table manager (m_transtab), some parts of Memcheck. One
side effect is that Valgrind almost never segfaults - instead if it
fails it does so by failing one of these assertions or sanity
checks.
|
|
From: Vince W. <vi...@cs...> - 2007-03-28 02:55:55
|
Hello I was reading the PLDI paper and just wanted to say it's great work. I wish all this info were available last year when I first started writing some valgrind tools. One thing, in the paper it is mentioned that galgel wouldn't compile with gfortran. The secret to getting it to compile is the "-ffixed-form" option. Also, in previous e-mails when I was complaining about code quality in the spec2k benchmarks, I was of course excepting bzip2 in that analysis ;) Vince |
|
From: Tom H. <th...@cy...> - 2007-03-28 02:39:35
|
Nightly build on alvis ( i686, Red Hat 7.3 ) started at 2007-03-28 03:15:02 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 256 tests, 27 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/addressable (stderr) memcheck/tests/badjump (stderr) memcheck/tests/describe-block (stderr) memcheck/tests/erringfds (stderr) memcheck/tests/leak-0 (stderr) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-pool-0 (stderr) memcheck/tests/leak-pool-1 (stderr) memcheck/tests/leak-pool-2 (stderr) memcheck/tests/leak-pool-3 (stderr) memcheck/tests/leak-pool-4 (stderr) memcheck/tests/leak-pool-5 (stderr) memcheck/tests/leak-regroot (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/long_namespace_xml (stderr) memcheck/tests/match-overrun (stderr) memcheck/tests/partial_load_dflt (stderr) memcheck/tests/partial_load_ok (stderr) memcheck/tests/partiallydefinedeq (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/sigkill (stderr) memcheck/tests/stack_changes (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_supp (stderr) memcheck/tests/x86/xor-undef-x86 (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: Tom H. <th...@cy...> - 2007-03-28 02:23:49
|
Nightly build on dellow ( x86_64, Fedora Core 6 ) started at 2007-03-28 03:10:08 BST Results differ from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 291 tests, 4 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) ================================================= == Results from 24 hours ago == ================================================= Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 291 tests, 4 stderr failures, 2 stdout failures, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) none/tests/pth_detached (stdout) ================================================= == Difference between 24 hours ago and now == ================================================= *** old.short Wed Mar 28 03:16:49 2007 --- new.short Wed Mar 28 03:23:23 2007 *************** *** 8,10 **** ! == 291 tests, 4 stderr failures, 2 stdout failures, 0 posttest failures == memcheck/tests/pointer-trace (stderr) --- 8,10 ---- ! == 291 tests, 4 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/pointer-trace (stderr) *************** *** 14,16 **** none/tests/mremap2 (stdout) - none/tests/pth_detached (stdout) --- 14,15 ---- |
|
From: Tom H. <th...@cy...> - 2007-03-28 02:19:10
|
Nightly build on lloyd ( x86_64, Fedora Core 3 ) started at 2007-03-28 03:05:06 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 291 tests, 6 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_supp (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: Tom H. <th...@cy...> - 2007-03-28 02:14:40
|
Nightly build on gill ( x86_64, Fedora Core 2 ) started at 2007-03-28 03:00:02 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 293 tests, 6 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_supp (stderr) none/tests/fdleak_fcntl (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: <sv...@va...> - 2007-03-28 01:27:07
|
Author: njn
Date: 2007-03-28 02:27:05 +0100 (Wed, 28 Mar 2007)
New Revision: 6680
Log:
Remove duplicate code -- make XArray use VG_(ssort).
Had to change XArray's comparison function to return an Int rather than a
Word so it's consistent with the rest of the world.
Modified:
trunk/coregrind/m_debuginfo/readxcoff.c
trunk/coregrind/m_xarray.c
trunk/include/pub_tool_xarray.h
Modified: trunk/coregrind/m_debuginfo/readxcoff.c
===================================================================
--- trunk/coregrind/m_debuginfo/readxcoff.c 2007-03-27 23:40:46 UTC (rev 6679)
+++ trunk/coregrind/m_debuginfo/readxcoff.c 2007-03-28 01:27:05 UTC (rev 6680)
@@ -262,7 +262,7 @@
return False;
}
-static Word cmp_Names ( Name n1, Name n2 )
+static Int cmp_Names ( Name n1, Name n2 )
{
UInt i = 0;
while (1) {
@@ -384,7 +384,7 @@
}
/* Compare XCoffSyms by their start address. */
-static Word cmp_XCoffSym_by_start ( void* v1, void* v2 )
+static Int cmp_XCoffSym_by_start ( void* v1, void* v2 )
{
XCoffSym* s1 = (XCoffSym*)v1;
XCoffSym* s2 = (XCoffSym*)v2;
@@ -395,7 +395,7 @@
/* Compare XCoffSyms by a slightly weaker ordering, returning zero
(equivalence) for any overlap, and -1 or 1 otherwise. */
-static Word cmp_XCoffSym_by_overlap ( void* v1, void* v2 )
+static Int cmp_XCoffSym_by_overlap ( void* v1, void* v2 )
{
XCoffSym* s1 = (XCoffSym*)v1;
XCoffSym* s2 = (XCoffSym*)v2;
@@ -406,7 +406,7 @@
/* Compare XCoffSyms by their start address, and for equal addresses,
use the name as a secondary sort key. */
-static Word cmp_XCoffSym_by_start_then_name ( void* v1, void* v2 )
+static Int cmp_XCoffSym_by_start_then_name ( void* v1, void* v2 )
{
XCoffSym* s1 = (XCoffSym*)v1;
XCoffSym* s2 = (XCoffSym*)v2;
Modified: trunk/coregrind/m_xarray.c
===================================================================
--- trunk/coregrind/m_xarray.c 2007-03-27 23:40:46 UTC (rev 6679)
+++ trunk/coregrind/m_xarray.c 2007-03-28 01:27:05 UTC (rev 6680)
@@ -40,7 +40,7 @@
struct _XArray {
void* (*alloc) ( SizeT ); /* alloc fn (nofail) */
void (*free) ( void* ); /* free fn */
- Word (*cmpFn) ( void*, void* ); /* cmp fn (may be NULL) */
+ Int (*cmpFn) ( void*, void* ); /* cmp fn (may be NULL) */
Word elemSzB; /* element size in bytes */
void* arr; /* pointer to elements */
Word usedsizeE; /* # used elements in arr */
@@ -86,7 +86,7 @@
xa->free(xa);
}
-void VG_(setCmpFnXA) ( XArray* xao, Word (*compar)(void*,void*) )
+void VG_(setCmpFnXA) ( XArray* xao, Int (*compar)(void*,void*) )
{
struct _XArray* xa = (struct _XArray*)xao;
vg_assert(xa);
@@ -140,60 +140,12 @@
return xa->usedsizeE-1;
}
-// Generic shell sort. Like stdlib.h's qsort().
-static void ssort( void* base, Word nmemb, Word size,
- Word (*compar)(void*, void*) )
-{
- Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
- 9841, 29524, 88573, 265720,
- 797161, 2391484 };
- Int lo = 0;
- Int hi = nmemb-1;
- Int i, j, h, bigN, hp;
-
- bigN = hi - lo + 1; if (bigN < 2) return;
- hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
-
- #define SORT \
- for ( ; hp >= 0; hp--) { \
- h = incs[hp]; \
- for (i = lo + h; i <= hi; i++) { \
- ASSIGN(v,0, a,i); \
- j = i; \
- while (COMPAR(a,(j-h), v,0) > 0) { \
- ASSIGN(a,j, a,(j-h)); \
- j = j - h; \
- if (j <= (lo + h - 1)) break; \
- } \
- ASSIGN(a,j, v,0); \
- } \
- }
-
- // General case
- {
- char* a = base;
- char v[size]; // will be at least 'size' bytes
-
- #define ASSIGN(dst, dsti, src, srci) \
- VG_(memcpy)( &dst[size*(dsti)], &src[size*(srci)], size );
-
- #define COMPAR(dst, dsti, src, srci) \
- compar( &dst[size*(dsti)], &src[size*(srci)] )
-
- SORT;
-
- #undef ASSIGN
- #undef COMPAR
- }
- #undef SORT
-}
-
void VG_(sortXA) ( XArray* xao )
{
struct _XArray* xa = (struct _XArray*)xao;
vg_assert(xa);
vg_assert(xa->cmpFn);
- ssort( xa->arr, xa->usedsizeE, xa->elemSzB, xa->cmpFn );
+ VG_(ssort)( xa->arr, xa->usedsizeE, xa->elemSzB, xa->cmpFn );
xa->sorted = True;
}
Modified: trunk/include/pub_tool_xarray.h
===================================================================
--- trunk/include/pub_tool_xarray.h 2007-03-27 23:40:46 UTC (rev 6679)
+++ trunk/include/pub_tool_xarray.h 2007-03-28 01:27:05 UTC (rev 6680)
@@ -59,7 +59,7 @@
/* Set the comparison function for this XArray. This clears an
internal 'array is sorted' flag, which means you must call sortXA
before making further queries with lookupXA. */
-extern void VG_(setCmpFnXA) ( XArray*, Word (*compar)(void*,void*) );
+extern void VG_(setCmpFnXA) ( XArray*, Int (*compar)(void*,void*) );
/* Add an element to an XArray. Element is copied into the XArray.
Index at which it was added is returned. Note this will be
|
|
From: <js...@ac...> - 2007-03-28 00:16:23
|
Nightly build on g5 ( SuSE 10.1, ppc970 ) started at 2007-03-28 02:00:02 CEST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 226 tests, 6 stderr failures, 2 stdout failures, 0 posttest failures == memcheck/tests/deep_templates (stdout) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/pointer-trace (stderr) none/tests/faultstatus (stderr) none/tests/fdleak_cmsg (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |