You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
1
(10) |
2
(8) |
3
(17) |
4
(28) |
5
(22) |
6
(8) |
|
7
(8) |
8
(22) |
9
(12) |
10
(17) |
11
(14) |
12
(15) |
13
(6) |
|
14
(9) |
15
(9) |
16
(16) |
17
(13) |
18
(18) |
19
(7) |
20
(5) |
|
21
(6) |
22
(5) |
23
(11) |
24
(5) |
25
(11) |
26
(7) |
27
(15) |
|
28
(11) |
29
(12) |
30
(12) |
31
(15) |
|
|
|
|
From: <sv...@va...> - 2007-10-12 21:55:28
|
Author: sewardj
Date: 2007-10-12 22:55:30 +0100 (Fri, 12 Oct 2007)
New Revision: 6991
Log:
Performance enhancements:
* use a 2-way set associative cache, instead of direct-mapped
* make the cache larger
* apply inlining
Modified:
branches/THRCHECK/thrcheck/tc_main.c
Modified: branches/THRCHECK/thrcheck/tc_main.c
===================================================================
--- branches/THRCHECK/thrcheck/tc_main.c 2007-10-12 20:27:33 UTC (rev 6990)
+++ branches/THRCHECK/thrcheck/tc_main.c 2007-10-12 21:55:30 UTC (rev 6991)
@@ -323,7 +323,7 @@
UInt dict[4]; /* can represent up to 4 diff values in the line */
UChar ix2s[N_LINE_W8s/4]; /* array of N_LINE_W8s 2-bit dict indexes */
/* if dict[0] == 0 then dict[1] is the index of the CacheLineF
- to use */
+ to use */
}
CacheLineZ; /* compressed rep for a cache line */
@@ -354,7 +354,7 @@
Each SecMap must hold a power-of-2 number of CacheLines. Hence
N_SECMAP_BITS must >= N_LINE_BITS.
*/
-#define N_SECMAP_BITS 12
+#define N_SECMAP_BITS 13
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
// # CacheLines held by a SecMap
@@ -365,8 +365,8 @@
Bool mbHasLocks; /* hint: any locks in range? safe: True */
Bool mbHasShared; /* hint: any ShM/ShR states in range? safe: True */
CacheLineZ linesZ[N_SECMAP_ZLINES];
- CacheLineF* linezF;
- Int linezF_size;
+ CacheLineF* linesF;
+ Int linesF_size;
}
SecMap;
@@ -399,11 +399,11 @@
tl_assert(itr->line_no >= 0 && itr->line_no < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[itr->line_no];
if (lineZ->dict[0] == 0) {
- tl_assert(sm->linezF);
- tl_assert(sm->linezF_size > 0);
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
tl_assert(lineZ->dict[1] >= 0);
- tl_assert(lineZ->dict[1] < sm->linezF_size);
- lineF = &sm->linezF[ lineZ->dict[1] ];
+ tl_assert(lineZ->dict[1] < sm->linesF_size);
+ lineF = &sm->linesF[ lineZ->dict[1] ];
tl_assert(lineF->inUse);
tl_assert(itr->word_no >= 0 && itr->word_no < N_LINE_W8s);
*pVal = &lineF->w32s[itr->word_no];
@@ -444,8 +444,10 @@
with a bogus tag. */
typedef
struct {
- CacheLine way0 [N_WAY_NENT];
+ CacheLine lyns0[N_WAY_NENT];
+ CacheLine lyns1[N_WAY_NENT];
Addr tags0[N_WAY_NENT];
+ Addr tags1[N_WAY_NENT];
}
Cache;
@@ -1746,8 +1748,8 @@
for (j = 0; j < N_LINE_W8s/4; j++)
sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
}
- sm->linezF = NULL;
- sm->linezF_size = 0;
+ sm->linesF = NULL;
+ sm->linesF_size = 0;
stats__secmaps_allocd++;
stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
@@ -2148,15 +2150,32 @@
// check the cache
for (i = 0; i < N_WAY_NENT; i++) {
- CacheLine* cl = &cache_shmem.way0[i];
- Addr tag = cache_shmem.tags0[i];
- if (tag == 1)
- continue;
- if (!is_valid_scache_tag(tag)) BAD("14");
- if (!is_sane_CacheLine(cl)) BAD("15");
- if (tag & (N_LINE_W8s-1)) BAD("16");
- for (j = i+1; j < N_WAY_NENT; j++)
- if (cache_shmem.tags0[j] == tag) BAD("17");
+ CacheLine* cl;
+ Addr tag;
+ /* way0, dude */
+ cl = &cache_shmem.lyns0[i];
+ tag = cache_shmem.tags0[i];
+ if (tag != 1) {
+ if (!is_valid_scache_tag(tag)) BAD("14-0");
+ if (!is_sane_CacheLine(cl)) BAD("15-0");
+ if (tag & (N_LINE_W8s-1)) BAD("16-0");
+ for (j = i+1; j < N_WAY_NENT; j++)
+ if (cache_shmem.tags0[j] == tag) BAD("17-0");
+ }
+ /* way1 */
+ cl = &cache_shmem.lyns1[i];
+ tag = cache_shmem.tags1[i];
+ if (tag != 1) {
+ if (!is_valid_scache_tag(tag)) BAD("14-1");
+ if (!is_sane_CacheLine(cl)) BAD("15-1");
+ if (tag & (N_LINE_W8s-1)) BAD("16-1");
+ for (j = i+1; j < N_WAY_NENT; j++)
+ if (cache_shmem.tags1[j] == tag) BAD("17-1");
+ }
+ /* and also */
+ if (cache_shmem.tags0[i] != 1 && cache_shmem.tags1[i] != 1
+ && cache_shmem.tags0[i] == cache_shmem.tags1[i])
+ BAD("18");
}
return;
@@ -2637,12 +2656,12 @@
lineZ = &sm->linesZ[zix];
if (lineZ->dict[0] == 0) {
Int fix = lineZ->dict[1];
- tl_assert(sm->linezF);
- tl_assert(sm->linezF_size > 0);
- tl_assert(fix >= 0 && fix < sm->linezF_size);
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
*zp = NULL;
- *fp = &sm->linezF[fix];
- tl_assert(sm->linezF[fix].inUse);
+ *fp = &sm->linesF[fix];
+ tl_assert(sm->linesF[fix].inUse);
} else {
*zp = lineZ;
*fp = NULL;
@@ -2666,10 +2685,10 @@
lineF = NULL;
if (lineZ->dict[0] == 0) {
Word fix = lineZ->dict[1];
- tl_assert(sm->linezF);
- tl_assert(sm->linezF_size > 0);
- tl_assert(fix >= 0 && fix < sm->linezF_size);
- lineF = &sm->linezF[fix];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
lineZ = NULL;
}
@@ -2696,10 +2715,10 @@
/* If lineZ has an associated lineF, free it up. */
if (lineZ->dict[0] == 0) {
Word fix = lineZ->dict[1];
- tl_assert(sm->linezF);
- tl_assert(sm->linezF_size > 0);
- tl_assert(fix >= 0 && fix < sm->linezF_size);
- lineF = &sm->linezF[fix];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
lineF->inUse = False;
}
@@ -2714,15 +2733,15 @@
Word i, new_size;
CacheLineF* nyu;
- if (sm->linezF) {
- tl_assert(sm->linezF_size > 0);
+ if (sm->linesF) {
+ tl_assert(sm->linesF_size > 0);
} else {
- tl_assert(sm->linezF_size == 0);
+ tl_assert(sm->linesF_size == 0);
}
- if (sm->linezF) {
- for (i = 0; i < sm->linezF_size; i++) {
- if (!sm->linezF[i].inUse) {
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
@@ -2730,35 +2749,35 @@
}
/* No free F line found. Expand existing array and try again. */
- new_size = sm->linezF_size==0 ? 1 : 2 * sm->linezF_size;
+ new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
nyu = tc_zalloc( new_size * sizeof(CacheLineF) );
tl_assert(nyu);
- stats__secmap_linesF_allocd += (new_size - sm->linezF_size);
- stats__secmap_linesF_bytes += (new_size - sm->linezF_size)
+ stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
+ stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
* sizeof(CacheLineF);
if (0)
VG_(printf)("SM %p: expand F array from %d to %d\n",
- sm, (Int)sm->linezF_size, new_size);
+ sm, (Int)sm->linesF_size, new_size);
for (i = 0; i < new_size; i++)
nyu[i].inUse = False;
- if (sm->linezF) {
- for (i = 0; i < sm->linezF_size; i++) {
- tl_assert(sm->linezF[i].inUse);
- nyu[i] = sm->linezF[i];
+ if (sm->linesF) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ tl_assert(sm->linesF[i].inUse);
+ nyu[i] = sm->linesF[i];
}
- VG_(memset)(sm->linezF, 0, sm->linezF_size * sizeof(CacheLineF) );
- tc_free(sm->linezF);
+ VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(CacheLineF) );
+ tc_free(sm->linesF);
}
- sm->linezF = nyu;
- sm->linezF_size = new_size;
+ sm->linesF = nyu;
+ sm->linesF_size = new_size;
- for (i = 0; i < sm->linezF_size; i++) {
- if (!sm->linezF[i].inUse) {
+ for (i = 0; i < sm->linesF_size; i++) {
+ if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
@@ -2803,6 +2822,7 @@
return True;
}
+__attribute__((unused))
static void pp_CacheLine ( CacheLine* cl ) {
Word i;
#define FMT "%08x\n"
@@ -2918,49 +2938,6 @@
/* Write the cacheline 'wix' to backing store. Where it ends up
is determined by its tag field. */
-static void analyse ( UInt* ws, Word nWs )
-{
-
-static Word qq=0;
- static Word qqx[20];
-
- Word i, j, nDiff;
-
- if (qq==0) { for (i = 0; i < 20; i++) qqx[i]=0; }
- qq++;
-
- nDiff = 1;
- for (i = 1; i < nWs; i++) {
- nDiff++;
- for (j = 0; j < i; j++) {
- if (ws[j] == ws[i])
- break;
- }
- if (j < i)
- nDiff--;
- }
-
- if (nDiff >= 19) nDiff=19;
- qqx[nDiff]++;
-
- if ((qq % 100000) == 0) {
- tl_assert(qqx[0] == 0);
- VG_(printf)("%lu ", qq);
- for (j = 1; j < 20; j++)
- VG_(printf)("%lu ", qqx[j]);
- VG_(printf)("\n");
- }
-
-#if 0
- if (nDiff >= 5) {
- VG_(printf)("diff %ld\n", nDiff);
- for (i = 0; i < nWs; i++)
- VG_(printf)("%x ", ws[i]);
- VG_(printf)("\n\n");
- }
-#endif
-}
-
static
Bool sequentialise_into ( /*OUT*/UInt* dst, Word nDst, CacheLine* src )
{
@@ -3039,7 +3016,7 @@
}
-static void cacheline_wback ( UWord wix )
+static void cacheline_wback ( UWord way, UWord wix )
{
Word i, j;
Bool anyShared = False;
@@ -3052,10 +3029,19 @@
UInt shvals[N_LINE_W8s];
UInt sv;
+ if (0)
+ VG_(printf)("scache wback way %d line %d\n", (Int)way, (Int)wix);
+
+ tl_assert(way >= 0 && way < 2);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
- //VG_(printf)("scache wback line %d\n", wix);
- tag = cache_shmem.tags0[wix];
+ if (way == 0) {
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+ } else {
+ tag = cache_shmem.tags1[wix];
+ cl = &cache_shmem.lyns1[wix];
+ }
/* The cache line may have been invalidated; if so, ignore it. */
if (!is_valid_scache_tag(tag))
@@ -3073,7 +3059,6 @@
lineZ = &sm->linesZ[zix];
/* Generate the data to be stored */
- cl = &cache_shmem.way0[wix];
tl_assert(is_sane_CacheLine( cl ));
anyShared = sequentialise_into( shvals, N_LINE_W8s, cl );
@@ -3105,10 +3090,10 @@
if (i < N_LINE_W8s) {
/* cannot use the compressed rep. Use f rep instead. */
alloc_F_for_writing( sm, &fix );
- tl_assert(sm->linezF);
- tl_assert(sm->linezF_size > 0);
- tl_assert(fix >= 0 && fix < sm->linezF_size);
- lineF = &sm->linezF[fix];
+ tl_assert(sm->linesF);
+ tl_assert(sm->linesF_size > 0);
+ tl_assert(fix >= 0 && fix < sm->linesF_size);
+ lineF = &sm->linesF[fix];
tl_assert(!lineF->inUse);
lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = 0;
lineZ->dict[1] = (UInt)fix;
@@ -3130,17 +3115,27 @@
associated with 'wix' is assumed to have already been filled in;
hence that is used to determine where in the backing store to read
from. */
-static void cacheline_fetch ( UWord wix )
+static void cacheline_fetch ( UWord way, UWord wix )
{
- Word i;
- Addr tag;
+ Word i;
+ Addr tag;
+ CacheLine* cl;
CacheLineZ* lineZ;
CacheLineF* lineF;
- //VG_(printf)("scache fetch line %d\n", wix);
+ if (0)
+ VG_(printf)("scache fetch way %d line %d\n", (Int)way, (Int)wix);
+
+ tl_assert(way >= 0 && way < 2);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
- tag = cache_shmem.tags0[wix];
+ if (way == 0) {
+ tag = cache_shmem.tags0[wix];
+ cl = &cache_shmem.lyns0[wix];
+ } else {
+ tag = cache_shmem.tags1[wix];
+ cl = &cache_shmem.lyns1[wix];
+ }
/* reject nonsense requests */
tl_assert(is_valid_scache_tag(tag));
@@ -3155,7 +3150,7 @@
if (lineF) {
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_W8s; i++) {
- cache_shmem.way0[wix].w8[i] = lineF->w32s[i];
+ cl->w8[i] = lineF->w32s[i];
}
stats__cache_F_fetches++;
} else {
@@ -3165,18 +3160,19 @@
tl_assert(ix >= 0 && ix <= 3);
sv = lineZ->dict[ix];
tl_assert(sv != 0);
- cache_shmem.way0[wix].w8[i] = sv;
+ cl->w8[i] = sv;
}
stats__cache_Z_fetches++;
}
- cacheline_normalise( &cache_shmem.way0[wix] );
+ cacheline_normalise( cl );
}
static void shmem__flush_scache ( void ) {
Word wix;
if (0) VG_(printf)("scache flush\n");
for (wix = 0; wix < N_WAY_NENT; wix++) {
- cacheline_wback( wix );
+ cacheline_wback( 0, wix );
+ cacheline_wback( 1, wix );
}
stats__cache_flushes++;
}
@@ -3184,8 +3180,10 @@
Word wix;
if (0) VG_(printf)("scache inval\n");
tl_assert(!is_valid_scache_tag(1));
- for (wix = 0; wix < N_WAY_NENT; wix++)
+ for (wix = 0; wix < N_WAY_NENT; wix++) {
cache_shmem.tags0[wix] = 1/*INVALID*/;
+ cache_shmem.tags1[wix] = 1/*INVALID*/;
+ }
stats__cache_invals++;
}
@@ -3201,28 +3199,62 @@
static inline UWord get_cacheline_offset ( Addr a ) {
return (UWord)(a & (N_LINE_W8s - 1));
}
-static CacheLine* get_cacheline ( Addr a )
+
+static CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
+static inline CacheLine* get_cacheline ( Addr a )
{
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
- Addr tag = a & ~(N_LINE_W8s - 1);
- UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+ Addr tag = a & ~(N_LINE_W8s - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
stats__cache_totrefs++;
+ /* Check both Ways */
if (LIKELY(tag == cache_shmem.tags0[wix]))
- return &cache_shmem.way0[wix];
+ return &cache_shmem.lyns0[wix];
+ if (LIKELY(tag == cache_shmem.tags1[wix]))
+ return &cache_shmem.lyns1[wix];
+ return get_cacheline_MISS( a );
+}
+
+static CacheLine* get_cacheline_MISS ( Addr a )
+{
+ /* tag is 'a' with the in-line offset masked out,
+ eg a[31]..a[4] 0000 */
+ static UWord seed = 0;
+
+ CacheLine* cl;
+ Addr* tag_old_p;
+ UWord way;
+ Addr tag = a & ~(N_LINE_W8s - 1);
+ UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
+
+ /* Check both Ways */
+ tl_assert(tag != cache_shmem.tags0[wix]);
+ tl_assert(tag != cache_shmem.tags1[wix]);
+
/* Dump the old line into the backing store. */
stats__cache_totmisses++;
- if (is_valid_scache_tag( cache_shmem.tags0[wix] )) {
- //if (!is_sane_CacheLine( &cache_shmem .way0[wix]))
- //pp_CacheLine( &cache_shmem .way0[wix]);
- tl_assert(is_sane_CacheLine( &cache_shmem .way0[wix] ));
- cacheline_wback( wix );
+
+ /* arbitrarily choose the way to dump (not very scientific) */
+ way = seed++ & 1;
+
+ if (way == 0) {
+ cl = &cache_shmem.lyns0[wix];
+ tag_old_p = &cache_shmem.tags0[wix];
+ } else {
+ cl = &cache_shmem.lyns1[wix];
+ tag_old_p = &cache_shmem.tags1[wix];
}
+
+ if (is_valid_scache_tag( *tag_old_p )) {
+ tl_assert(is_sane_CacheLine( cl ));
+ cacheline_wback( way, wix );
+ }
/* and reload the new one */
- cache_shmem.tags0[wix] = tag;
- cacheline_fetch( wix );
- tl_assert(is_sane_CacheLine( &cache_shmem. way0[wix] ));
- return &cache_shmem.way0[wix];
+ *tag_old_p = tag;
+ cacheline_fetch( way, wix );
+ tl_assert(is_sane_CacheLine( cl ));
+ return cl;
}
/////////////////////////////vvvvvvvvvvvvvvvvvvvvvvvvvvvvv
@@ -5220,6 +5252,7 @@
}
}
+__attribute__((unused))
static void laog__sanity_check ( void ) {
Word i, ws_size;
Word* ws_words;
@@ -6862,11 +6895,11 @@
stats__secmaps_allocd,
stats__secmap_ga_space_covered);
VG_(printf)(" linesZ: %10lu allocd (%10lu bytes occupied)\n",
- stats__secmap_linesZ_allocd,
- stats__secmap_linesZ_bytes);
+ stats__secmap_linesZ_allocd,
+ stats__secmap_linesZ_bytes);
VG_(printf)(" linesF: %10lu allocd (%10lu bytes occupied)\n",
- stats__secmap_linesF_allocd,
- stats__secmap_linesF_bytes);
+ stats__secmap_linesF_allocd,
+ stats__secmap_linesF_bytes);
VG_(printf)(" secmaps: %10lu iterator steppings\n",
stats__secmap_iterator_steppings);
@@ -6882,32 +6915,32 @@
VG_(printf)("\n");
VG_(printf)(" cline: %10lu normalises\n",
- stats__cline_normalises );
+ stats__cline_normalises );
VG_(printf)(" cline: reads 8/4/2/1: %10lu %10lu %10lu %10lu\n",
- stats__cline_read8s,
- stats__cline_read4s,
- stats__cline_read2s,
+ stats__cline_read8s,
+ stats__cline_read4s,
+ stats__cline_read2s,
stats__cline_read1s );
VG_(printf)(" cline: writes 8/4/2/1: %10lu %10lu %10lu %10lu\n",
- stats__cline_write8s,
- stats__cline_write4s,
- stats__cline_write2s,
+ stats__cline_write8s,
+ stats__cline_write4s,
+ stats__cline_write2s,
stats__cline_write1s );
VG_(printf)(" cline: sets 8/4/2/1: %10lu %10lu %10lu %10lu\n",
- stats__cline_set8s,
- stats__cline_set4s,
- stats__cline_set2s,
+ stats__cline_set8s,
+ stats__cline_set4s,
+ stats__cline_set2s,
stats__cline_set1s );
VG_(printf)(" cline: get1s %lu, copy1s %lu\n",
- stats__cline_get1s, stats__cline_copy1s );
+ stats__cline_get1s, stats__cline_copy1s );
VG_(printf)(" cline: splits: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n",
- stats__cline_8to4splits,
- stats__cline_4to2splits,
+ stats__cline_8to4splits,
+ stats__cline_4to2splits,
stats__cline_2to1splits );
VG_(printf)(" cline: pulldowns: 8to4 %10lu, 4to2 %10lu, 2to1 %10lu\n",
- stats__cline_8to4pulldown,
- stats__cline_4to2pulldown,
+ stats__cline_8to4pulldown,
+ stats__cline_4to2pulldown,
stats__cline_2to1pulldown );
VG_(printf)("\n");
}
|
|
From: <sv...@va...> - 2007-10-12 20:27:34
|
Author: sewardj Date: 2007-10-12 21:27:33 +0100 (Fri, 12 Oct 2007) New Revision: 6990 Log: Implement compressed shadow memory. Use it to support shadow values at byte granularity instead of 32-bit granularity. Net effect is to allow each byte to be shadowed by a 32-bit value whilst using about 5 bits/byte. Intent is to provide byte-level lockset tracking, whilst keeping memory consumption reasonable. Initial experiments suggest memory consumption is similar to Memcheck, although that can't really be true in the limit case (Memcheck = 2 bits/byte vs 4 bits/byte here). Unfortunately: * the implementation of shadow memory is scarily complex * this slows the tool down massively, by a factor of about 2.5x compared to the situation before this commit. Currently under investigation. Modified: branches/THRCHECK/thrcheck/tc_main.c [... diff too large to include ...] |
|
From: Rich C. <Ric...@me...> - 2007-10-12 20:02:39
|
I'd like to volunteer to maintain Omega. Rich On Thu, 27 Sep 2007 12:10:31 +1000 (EST) Nicholas Nethercote <nj...@cs...> wrote: > Greetings, > > We have some bad news. Bryan Meredith, who wrote the leak-checking tool > Omega, died earlier this year in a motorcycle accident. We were informed of > this recently by one of Bryan's colleagues. > > We have been informed that Bryan's widow and colleagues would be happy if > somebody took over Omega and maintained it, so that his work would not be > lost. > > If anybody is interested in taking over maintainance and development of > Omega, please let us know. I believe that Omega's code is not as robust as > it could be, but that the tool is already useful to some people, and is > worthy of maintainence, further investigation and development. > > Nick > > > ------------------------------------------------------------------------- > This SF.net email is sponsored by: Microsoft > Defy all challenges. Microsoft(R) Visual Studio 2005. > http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ > _______________________________________________ > Valgrind-developers mailing list > Val...@li... > https://lists.sourceforge.net/lists/listinfo/valgrind-developers -- Rich Coe ric...@me... Virtual Principle Engineer General Electric Healthcare Technologies Global Software Platforms, Computer Technology Team |
|
From: <js...@ac...> - 2007-10-12 14:43:02
|
Nightly build on minnie ( SuSE 10.0, ppc32 ) started at 2007-10-12 09:00:02 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 220 tests, 10 stderr failures, 6 stdout failures, 0 posttest failures == memcheck/tests/leak-tree (stderr) memcheck/tests/leakotron (stdout) memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_changes (stderr) memcheck/tests/xml1 (stderr) none/tests/faultstatus (stderr) none/tests/fdleak_cmsg (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) none/tests/ppc32/jm-fp (stdout) none/tests/ppc32/jm-fp (stderr) none/tests/ppc32/round (stdout) none/tests/ppc32/round (stderr) none/tests/ppc32/test_fx (stdout) none/tests/ppc32/test_fx (stderr) none/tests/ppc32/test_gx (stdout) |
|
From: <sv...@va...> - 2007-10-12 06:44:19
|
Author: njn
Date: 2007-10-12 07:44:19 +0100 (Fri, 12 Oct 2007)
New Revision: 6989
Log:
Remove the misleading "curr_" prefix from various names.
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-10-12 05:12:48 UTC (rev 6988)
+++ branches/MASSIF2/massif/ms_main.c 2007-10-12 06:44:19 UTC (rev 6989)
@@ -474,7 +474,7 @@
// Bottom-XPts: space for the precise context.
// Other XPts: space of all the descendent bottom-XPts.
// Nb: this value goes up and down as the program executes.
- SizeT curr_szB;
+ SizeT szB;
XPt* parent; // pointer to parent XPt
@@ -520,7 +520,7 @@
VG_(printf)("XPt (%p):\n", xpt);
VG_(printf)("- ip: : %p\n", (void*)xpt->ip);
- VG_(printf)("- curr_szB : %ld\n", xpt->curr_szB);
+ VG_(printf)("- szB : %ld\n", xpt->szB);
VG_(printf)("- parent : %p\n", xpt->parent);
VG_(printf)("- n_children : %d\n", xpt->n_children);
VG_(printf)("- max_children: %d\n", xpt->max_children);
@@ -536,7 +536,7 @@
// that needs to be resizable.
XPt* xpt = perm_malloc(sizeof(XPt));
xpt->ip = ip;
- xpt->curr_szB = 0;
+ xpt->szB = 0;
xpt->parent = parent;
// We don't initially allocate any space for children. We let that
@@ -574,28 +574,28 @@
}
// Reverse comparison for a reverse sort -- biggest to smallest.
-static Int XPt_revcmp_curr_szB(void* n1, void* n2)
+static Int XPt_revcmp_szB(void* n1, void* n2)
{
XPt* xpt1 = *(XPt**)n1;
XPt* xpt2 = *(XPt**)n2;
- return ( xpt1->curr_szB < xpt2->curr_szB ? 1
- : xpt1->curr_szB > xpt2->curr_szB ? -1
- : 0);
+ return ( xpt1->szB < xpt2->szB ? 1
+ : xpt1->szB > xpt2->szB ? -1
+ : 0);
}
// Does the xpt account for >= 1% (or so) of total memory used?
-static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
+static Bool is_significant_XPt(XPt* xpt, SizeT total_szB)
{
// clo_threshold is measured in hundredths of a percent of total size,
// ie. 10,000ths of total size. So clo_threshold=100 means that the
- // threshold is 1% of total size. If curr_total_szB is zero, we consider
+ // threshold is 1% of total size. If total_szB is zero, we consider
// every XPt significant. We also always consider the alloc_xpt to be
// significant.
- tl_assert(xpt->curr_szB <= curr_total_szB);
+ tl_assert(xpt->szB <= total_szB);
return xpt == alloc_xpt || 0 == clo_threshold ||
- (0 != curr_total_szB &&
+ (0 != total_szB &&
// Nb: 10000 is a ULong to avoid possible overflow problems.
- xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold);
+ xpt->szB * 10000ULL / total_szB >= clo_threshold);
}
@@ -607,9 +607,9 @@
{
Int i;
XPt* dup_xpt = VG_(malloc)(sizeof(XPt));
- dup_xpt->ip = xpt->ip;
- dup_xpt->curr_szB = xpt->curr_szB;
- dup_xpt->parent = parent; // Nb: not xpt->children!
+ dup_xpt->ip = xpt->ip;
+ dup_xpt->szB = xpt->szB;
+ dup_xpt->parent = parent; // Nb: not xpt->children!
// If this node is not significant, there's no point duplicating its
// children. And not doing so can make a huge difference, eg.
// it speeds up massif/perf/many-xpts by over 10x.
@@ -675,9 +675,9 @@
SizeT children_sum_szB = 0;
for (i = 0; i < xpt->n_children; i++) {
sanity_check_XTree(xpt->children[i], xpt);
- children_sum_szB += xpt->children[i]->curr_szB;
+ children_sum_szB += xpt->children[i]->szB;
}
- tl_assert(children_sum_szB == xpt->curr_szB);
+ tl_assert(children_sum_szB == xpt->szB);
}
}
@@ -867,7 +867,7 @@
return xpt;
}
-// Update 'curr_szB' of every XPt in the XCon, by percolating upwards.
+// Update 'szB' of every XPt in the XCon, by percolating upwards.
static void update_XCon(XPt* xpt, SSizeT space_delta)
{
tl_assert(True == clo_heap);
@@ -878,12 +878,12 @@
return;
while (xpt != alloc_xpt) {
- if (space_delta < 0) tl_assert(xpt->curr_szB >= -space_delta);
- xpt->curr_szB += space_delta;
+ if (space_delta < 0) tl_assert(xpt->szB >= -space_delta);
+ xpt->szB += space_delta;
xpt = xpt->parent;
}
- if (space_delta < 0) tl_assert(alloc_xpt->curr_szB >= -space_delta);
- alloc_xpt->curr_szB += space_delta;
+ if (space_delta < 0) tl_assert(alloc_xpt->szB >= -space_delta);
+ alloc_xpt->szB += space_delta;
}
@@ -1204,7 +1204,7 @@
// XXX: total_szB computed in various places -- factor it out
SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + stacks_szB;
snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL, total_szB);
- tl_assert(snapshot->alloc_xpt->curr_szB == heap_szB);
+ tl_assert(snapshot->alloc_xpt->szB == heap_szB);
}
snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks;
}
@@ -1740,7 +1740,7 @@
static void pp_snapshot_XPt(Int fd, XPt* xpt, Int depth, Char* depth_str,
Int depth_str_len,
- SizeT curr_heap_szB, SizeT curr_total_szB)
+ SizeT snapshot_heap_szB, SizeT snapshot_total_szB)
{
#define BUF_LEN 1024
Int i;
@@ -1752,15 +1752,17 @@
Int n_insig_children;
Int n_child_entries;
- // Sort XPt's children by curr_szB (reverse order: biggest to smallest)
+ // Sort XPt's children by szB (reverse order: biggest to smallest)
VG_(ssort)(xpt->children, xpt->n_children, sizeof(XPt*),
- XPt_revcmp_curr_szB);
+ XPt_revcmp_szB);
// How many children are significant? Also calculate the number of child
// entries to print -- there may be a need for an "in N places" line.
n_sig_children = 0;
while (n_sig_children < xpt->n_children &&
- is_significant_XPt(xpt->children[n_sig_children], curr_total_szB)) {
+ is_significant_XPt(xpt->children[n_sig_children],
+ snapshot_total_szB))
+ {
n_sig_children++;
}
n_insig_children = xpt->n_children - n_sig_children;
@@ -1773,8 +1775,8 @@
} else {
ip_desc = VG_(describe_IP)(xpt->ip-1, ip_desc, BUF_LEN);
}
- perc = make_perc(xpt->curr_szB, curr_total_szB);
- FP("%sn%d: %lu %s\n", depth_str, n_child_entries, xpt->curr_szB, ip_desc);
+ perc = make_perc(xpt->szB, snapshot_total_szB);
+ FP("%sn%d: %lu %s\n", depth_str, n_child_entries, xpt->szB, ip_desc);
// Indent.
tl_assert(depth+1 < depth_str_len-1); // -1 for end NUL char
@@ -1785,15 +1787,15 @@
for (i = 0; i < n_sig_children; i++) {
XPt* child = xpt->children[i];
pp_snapshot_XPt(fd, child, depth+1, depth_str, depth_str_len,
- curr_heap_szB, curr_total_szB);
- printed_children_szB += child->curr_szB;
+ snapshot_heap_szB, snapshot_total_szB);
+ printed_children_szB += child->szB;
}
// Print the extra "in N places" line, if any children were insignificant.
if (n_insig_children > 0) {
Char* s = ( n_insig_children == 1 ? "," : "s, all" );
- SizeT total_insig_children_szB = xpt->curr_szB - printed_children_szB;
- perc = make_perc(total_insig_children_szB, curr_total_szB);
+ SizeT total_insig_children_szB = xpt->szB - printed_children_szB;
+ perc = make_perc(total_insig_children_szB, snapshot_total_szB);
FP("%sn0: %lu in %d place%s below massif's threshold (%s)\n",
depth_str, total_insig_children_szB, n_insig_children, s,
make_perc(clo_threshold, 10000));
|
|
From: <sv...@va...> - 2007-10-12 05:12:46
|
Author: njn
Date: 2007-10-12 06:12:48 +0100 (Fri, 12 Oct 2007)
New Revision: 6988
Log:
minor tweaks
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-10-12 03:37:58 UTC (rev 6987)
+++ branches/MASSIF2/massif/ms_main.c 2007-10-12 05:12:48 UTC (rev 6988)
@@ -653,10 +653,9 @@
n_dupd_xpts_freed++;
}
-// Sanity checking: we check snapshot XTrees when they are taken, deleted
-// and printed. We periodically check the main heap XTree with
-// ms_expensive_sanity_check.
-//
+// Sanity checking: we check snapshot XTrees after they are taken, before
+// they are deleted, and before they are printed. We also periodically
+// check the main heap XTree with ms_expensive_sanity_check.
static void sanity_check_XTree(XPt* xpt, XPt* parent)
{
Int i;
@@ -670,19 +669,16 @@
// Check children counts look sane.
tl_assert(xpt->n_children <= xpt->max_children);
- // Check the sum of any children szBs equals the XPt's szB.
+ // Check the sum of any children szBs equals the XPt's szB. Check the
+ // children at the same time.
if (xpt->n_children > 0) {
SizeT children_sum_szB = 0;
for (i = 0; i < xpt->n_children; i++) {
+ sanity_check_XTree(xpt->children[i], xpt);
children_sum_szB += xpt->children[i]->curr_szB;
}
tl_assert(children_sum_szB == xpt->curr_szB);
}
-
- // Check each child.
- for (i = 0; i < xpt->n_children; i++) {
- sanity_check_XTree(xpt->children[i], xpt);
- }
}
|
|
From: <sv...@va...> - 2007-10-12 03:37:58
|
Author: njn
Date: 2007-10-12 04:37:58 +0100 (Fri, 12 Oct 2007)
New Revision: 6987
Log:
update expected outputs
Modified:
branches/MASSIF2/massif/tests/deep-A.post.exp
branches/MASSIF2/massif/tests/deep-B.post.exp
branches/MASSIF2/massif/tests/deep-C.post.exp
Modified: branches/MASSIF2/massif/tests/deep-A.post.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-A.post.exp 2007-10-12 02:56:03 UTC (rev 6986)
+++ branches/MASSIF2/massif/tests/deep-A.post.exp 2007-10-12 03:37:58 UTC (rev 6987)
@@ -45,14 +45,14 @@
8 864 864 800 64 0
9 972 972 900 72 0
92.59% (900B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
-->92.59% (900B) 0x804838D: a12 (deep.c:18)
- ->92.59% (900B) 0x80483A3: a11 (deep.c:19)
- ->92.59% (900B) 0x80483B9: a10 (deep.c:20)
- ->92.59% (900B) 0x80483CF: a9 (deep.c:21)
- ->92.59% (900B) 0x80483E5: a8 (deep.c:22)
- ->92.59% (900B) 0x80483FB: a7 (deep.c:23)
- ->92.59% (900B) 0x8048411: a6 (deep.c:24)
- ->92.59% (900B) 0x8048427: a5 (deep.c:25)
+->92.59% (900B) 0x804838D: a12 (deep.c:16)
+ ->92.59% (900B) 0x80483A3: a11 (deep.c:17)
+ ->92.59% (900B) 0x80483B9: a10 (deep.c:18)
+ ->92.59% (900B) 0x80483CF: a9 (deep.c:19)
+ ->92.59% (900B) 0x80483E5: a8 (deep.c:20)
+ ->92.59% (900B) 0x80483FB: a7 (deep.c:21)
+ ->92.59% (900B) 0x8048411: a6 (deep.c:22)
+ ->92.59% (900B) 0x8048427: a5 (deep.c:23)
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) admin-heap(B) stacks(B)
Modified: branches/MASSIF2/massif/tests/deep-B.post.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-B.post.exp 2007-10-12 02:56:03 UTC (rev 6986)
+++ branches/MASSIF2/massif/tests/deep-B.post.exp 2007-10-12 03:37:58 UTC (rev 6987)
@@ -45,12 +45,12 @@
8 864 864 800 64 0
9 972 972 900 72 0
92.59% (900B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
-->92.59% (900B) 0x8048427: a5 (deep.c:25)
- ->92.59% (900B) 0x804843D: a4 (deep.c:26)
- ->92.59% (900B) 0x8048453: a3 (deep.c:27)
- ->92.59% (900B) 0x8048469: a2 (deep.c:28)
- ->92.59% (900B) 0x804847F: a1 (deep.c:29)
- ->92.59% (900B) 0x80484B3: main (deep.c:37)
+->92.59% (900B) 0x8048427: a5 (deep.c:23)
+ ->92.59% (900B) 0x804843D: a4 (deep.c:24)
+ ->92.59% (900B) 0x8048453: a3 (deep.c:25)
+ ->92.59% (900B) 0x8048469: a2 (deep.c:26)
+ ->92.59% (900B) 0x804847F: a1 (deep.c:27)
+ ->92.59% (900B) 0x80484B3: main (deep.c:35)
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) admin-heap(B) stacks(B)
Modified: branches/MASSIF2/massif/tests/deep-C.post.exp
===================================================================
--- branches/MASSIF2/massif/tests/deep-C.post.exp 2007-10-12 02:56:03 UTC (rev 6986)
+++ branches/MASSIF2/massif/tests/deep-C.post.exp 2007-10-12 03:37:58 UTC (rev 6987)
@@ -45,9 +45,9 @@
8 864 864 800 64 0
9 972 972 900 72 0
92.59% (900B) (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
-->92.59% (900B) 0x8048469: a2 (deep.c:28)
- ->92.59% (900B) 0x804847F: a1 (deep.c:29)
- ->92.59% (900B) 0x80484B3: main (deep.c:37)
+->92.59% (900B) 0x8048469: a2 (deep.c:26)
+ ->92.59% (900B) 0x804847F: a1 (deep.c:27)
+ ->92.59% (900B) 0x80484B3: main (deep.c:35)
--------------------------------------------------------------------------------
n time(B) total(B) useful-heap(B) admin-heap(B) stacks(B)
|
|
From: <sv...@va...> - 2007-10-12 02:56:04
|
Author: njn Date: 2007-10-12 03:56:03 +0100 (Fri, 12 Oct 2007) New Revision: 6986 Log: wibble Modified: branches/MASSIF2/massif/perf/many-xpts.vgperf Modified: branches/MASSIF2/massif/perf/many-xpts.vgperf =================================================================== --- branches/MASSIF2/massif/perf/many-xpts.vgperf 2007-10-12 02:47:11 UTC (rev 6985) +++ branches/MASSIF2/massif/perf/many-xpts.vgperf 2007-10-12 02:56:03 UTC (rev 6986) @@ -1,2 +1,2 @@ prog: many-xpts -vgopts: --tool=massif --time-unit=B --depth=100 +vgopts: --time-unit=B --depth=100 |
|
From: <sv...@va...> - 2007-10-12 02:47:10
|
Author: njn
Date: 2007-10-12 03:47:11 +0100 (Fri, 12 Oct 2007)
New Revision: 6985
Log:
minor comment changes
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-10-12 02:33:12 UTC (rev 6984)
+++ branches/MASSIF2/massif/ms_main.c 2007-10-12 02:47:11 UTC (rev 6985)
@@ -55,7 +55,7 @@
// many-xpts 0.05s ma:23.5s (470.6x, -----)
//
// Don't dup children of insignificant XPts in dup_XTree. Made many-xpts
-// more than 10x faster.
+// more than 10x faster (r6984):
// heap 0.59s ma:20.3s (34.5x, -----)
// tinycc 0.49s ma: 7.6s (15.4x, -----)
// many-xpts 0.04s ma: 1.9s (46.2x, -----)
@@ -843,8 +843,7 @@
// After this call, the IPs we want are in ips[0]..ips[n_ips-1].
Int n_ips = get_IPs(tid, is_custom_alloc, ips);
- // Now do the search/insertion of the XCon. 'L' is the loop counter,
- // being the index into ips[].
+ // Now do the search/insertion of the XCon.
for (i = 0; i < n_ips; i++) {
Addr ip = ips[i];
Int ch;
|
|
From: <sv...@va...> - 2007-10-12 02:33:13
|
Author: njn
Date: 2007-10-12 03:33:12 +0100 (Fri, 12 Oct 2007)
New Revision: 6984
Log:
Don't dup children of insignificant XPts in dup_XTree. Made many-xpts
more than 10x faster.
Modified:
branches/MASSIF2/massif/ms_main.c
Modified: branches/MASSIF2/massif/ms_main.c
===================================================================
--- branches/MASSIF2/massif/ms_main.c 2007-10-11 08:46:56 UTC (rev 6983)
+++ branches/MASSIF2/massif/ms_main.c 2007-10-12 02:33:12 UTC (rev 6984)
@@ -54,6 +54,12 @@
// tinycc 0.45s ma: 7.4s (16.4x, -----)
// many-xpts 0.05s ma:23.5s (470.6x, -----)
//
+// Don't dup children of insignificant XPts in dup_XTree. Made many-xpts
+// more than 10x faster.
+// heap 0.59s ma:20.3s (34.5x, -----)
+// tinycc 0.49s ma: 7.6s (15.4x, -----)
+// many-xpts 0.04s ma: 1.9s (46.2x, -----)
+//
// Todo:
// - do snapshots on client requests
// - C++ tests -- for each of the allocators, and overloaded versions of
@@ -577,30 +583,55 @@
: 0);
}
+// Does the xpt account for >= 1% (or so) of total memory used?
+static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
+{
+ // clo_threshold is measured in hundredths of a percent of total size,
+ // ie. 10,000ths of total size. So clo_threshold=100 means that the
+ // threshold is 1% of total size. If curr_total_szB is zero, we consider
+ // every XPt significant. We also always consider the alloc_xpt to be
+ // significant.
+ tl_assert(xpt->curr_szB <= curr_total_szB);
+ return xpt == alloc_xpt || 0 == clo_threshold ||
+ (0 != curr_total_szB &&
+ // Nb: 10000 is a ULong to avoid possible overflow problems.
+ xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold);
+}
+
//------------------------------------------------------------//
//--- XTree Operations ---//
//------------------------------------------------------------//
-// XXX: taking a full snapshot... could/should just snapshot the significant
-// parts. Nb: then the amounts wouldn't add up, unless I represented the
-// "insignificant places" in XPts. Might be worthwhile -- there can
-// be a lot of zero nodes in the XTree... (simpler: ignore all zero nodes
-// unless threshold=0?)
-static XPt* dup_XTree(XPt* xpt, XPt* parent)
+static XPt* dup_XTree(XPt* xpt, XPt* parent, SizeT total_szB)
{
Int i;
XPt* dup_xpt = VG_(malloc)(sizeof(XPt));
dup_xpt->ip = xpt->ip;
dup_xpt->curr_szB = xpt->curr_szB;
dup_xpt->parent = parent; // Nb: not xpt->children!
- dup_xpt->n_children = xpt->n_children;
- dup_xpt->max_children = xpt->n_children; // Nb: don't copy max_children!
- dup_xpt->children = VG_(malloc)(dup_xpt->max_children * sizeof(XPt*));
- for (i = 0; i < xpt->n_children; i++) {
- dup_xpt->children[i] = dup_XTree(xpt->children[i], dup_xpt);
+ // If this node is not significant, there's no point duplicating its
+ // children. And not doing so can make a huge difference, eg.
+ // it speeds up massif/perf/many-xpts by over 10x.
+ if (!is_significant_XPt(xpt, total_szB)) {
+ dup_xpt->n_children = 0;
+ dup_xpt->max_children = 0;
+ dup_xpt->children = NULL;
+ } else {
+ dup_xpt->n_children = xpt->n_children;
+ dup_xpt->max_children = xpt->max_children;
+ // We copy n_children children (not max_children). If n_children==0,
+ // don't bother allocating an 'children' array in the dup.
+ if (xpt->n_children > 0) {
+ dup_xpt->children = VG_(malloc)(dup_xpt->n_children * sizeof(XPt*));
+ for (i = 0; i < xpt->n_children; i++) {
+ dup_xpt->children[i] =
+ dup_XTree(xpt->children[i], dup_xpt, total_szB);
+ }
+ } else {
+ dup_xpt->children = NULL;
+ }
}
-
n_dupd_xpts++;
return dup_xpt;
@@ -1175,7 +1206,9 @@
if (clo_heap) {
snapshot->heap_szB = heap_szB;
if (is_detailed) {
- snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL);
+ // XXX: total_szB computed in various places -- factor it out
+ SizeT total_szB = heap_szB + clo_heap_admin*n_heap_blocks + stacks_szB;
+ snapshot->alloc_xpt = dup_XTree(alloc_xpt, /*parent*/NULL, total_szB);
tl_assert(snapshot->alloc_xpt->curr_szB == heap_szB);
}
snapshot->heap_admin_szB = clo_heap_admin * n_heap_blocks;
@@ -1710,21 +1743,6 @@
return mbuf;
}
-// Does the xpt account for >= 1% (or so) of total memory used?
-static Bool is_significant_XPt(XPt* xpt, SizeT curr_total_szB)
-{
- // clo_threshold is measured in hundredths of a percent of total size,
- // ie. 10,000ths of total size. So clo_threshold=100 means that the
- // threshold is 1% of total size. If curr_total_szB is zero, we consider
- // every XPt significant. We also always consider the alloc_xpt to be
- // significant.
- tl_assert(xpt->curr_szB <= curr_total_szB);
- return xpt == alloc_xpt || 0 == clo_threshold ||
- (0 != curr_total_szB &&
- // Nb: 10000 is a ULong to avoid possible overflow problems.
- xpt->curr_szB * 10000ULL / curr_total_szB >= clo_threshold);
-}
-
static void pp_snapshot_XPt(Int fd, XPt* xpt, Int depth, Char* depth_str,
Int depth_str_len,
SizeT curr_heap_szB, SizeT curr_total_szB)
|
|
From: Tom H. <th...@cy...> - 2007-10-12 02:31:18
|
Nightly build on alvis ( i686, Red Hat 7.3 ) started at 2007-10-12 03:15:02 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 260 tests, 27 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/addressable (stderr) memcheck/tests/badjump (stderr) memcheck/tests/describe-block (stderr) memcheck/tests/erringfds (stderr) memcheck/tests/leak-0 (stderr) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-pool-0 (stderr) memcheck/tests/leak-pool-1 (stderr) memcheck/tests/leak-pool-2 (stderr) memcheck/tests/leak-pool-3 (stderr) memcheck/tests/leak-pool-4 (stderr) memcheck/tests/leak-pool-5 (stderr) memcheck/tests/leak-regroot (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/long_namespace_xml (stderr) memcheck/tests/match-overrun (stderr) memcheck/tests/partial_load_dflt (stderr) memcheck/tests/partial_load_ok (stderr) memcheck/tests/partiallydefinedeq (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/sigkill (stderr) memcheck/tests/stack_changes (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_supp (stderr) memcheck/tests/x86/xor-undef-x86 (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: Tom H. <th...@cy...> - 2007-10-12 02:24:12
|
Nightly build on lloyd ( x86_64, Fedora 7 ) started at 2007-10-12 03:05:06 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 293 tests, 4 stderr failures, 2 stdout failures, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/vcpu_fnfns (stdout) memcheck/tests/x86/scalar (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: Tom H. <th...@cy...> - 2007-10-12 02:24:05
|
Nightly build on dellow ( x86_64, Fedora 7 ) started at 2007-10-12 03:10:04 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 293 tests, 4 stderr failures, 3 stdout failures, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/vcpu_fnfns (stdout) memcheck/tests/x86/scalar (stderr) memcheck/tests/xml1 (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) none/tests/pth_detached (stdout) |
|
From: Tom H. <th...@cy...> - 2007-10-12 02:11:32
|
Nightly build on gill ( x86_64, Fedora Core 2 ) started at 2007-10-12 03:00:03 BST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 295 tests, 6 stderr failures, 1 stdout failure, 0 posttest failures == memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) memcheck/tests/x86/scalar_supp (stderr) none/tests/fdleak_fcntl (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |
|
From: <js...@ac...> - 2007-10-12 00:41:41
|
Nightly build on g5 ( SuSE 10.1, ppc970 ) started at 2007-10-12 02:00:01 CEST Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 228 tests, 6 stderr failures, 2 stdout failures, 0 posttest failures == memcheck/tests/deep_templates (stdout) memcheck/tests/leak-cycle (stderr) memcheck/tests/leak-tree (stderr) memcheck/tests/pointer-trace (stderr) none/tests/faultstatus (stderr) none/tests/fdleak_cmsg (stderr) none/tests/mremap (stderr) none/tests/mremap2 (stdout) |