You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
1
(16) |
2
(23) |
3
(15) |
|
4
(19) |
5
(21) |
6
(27) |
7
(18) |
8
(17) |
9
(15) |
10
(11) |
|
11
(9) |
12
(18) |
13
(26) |
14
(28) |
15
(26) |
16
(20) |
17
(27) |
|
18
(16) |
19
(40) |
20
(2) |
21
(11) |
22
(27) |
23
(24) |
24
(16) |
|
25
(10) |
26
(12) |
27
(16) |
28
(7) |
29
(6) |
30
(15) |
31
(5) |
|
From: <sv...@va...> - 2005-12-26 21:01:35
|
Author: njn Date: 2005-12-26 21:01:31 +0000 (Mon, 26 Dec 2005) New Revision: 5443 Log: Remove out-of-date profile events list. Modified: branches/COMPVBITS/memcheck/mc_main.c Modified: branches/COMPVBITS/memcheck/mc_main.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- branches/COMPVBITS/memcheck/mc_main.c 2005-12-26 19:29:33 UTC (rev 54= 42) +++ branches/COMPVBITS/memcheck/mc_main.c 2005-12-26 21:01:31 UTC (rev 54= 43) @@ -3846,102 +3846,9 @@ /*--- Crude profiling machinery. ---*/ /*------------------------------------------------------------*/ =20 -/* Event index. If just the name of the fn is given, this means the - number of calls to the fn. Otherwise it is the specified event. - Ones marked 'M' are MemCheck only. Ones marked 'A' are AddrCheck onl= y. - The rest are shared. +// We track a number of interesting events (using PROF_EVENT) +// if MC_PROFILE_MEMORY is defined. =20 - 10 alloc_secondary_map - - 20 get_abit -M 21 get_vbyte - 22 set_abit -M 23 set_vbyte - 24 get_abits4_ALIGNED -M 25 get_vbytes4_ALIGNED =20 - - 30 set_address_range_perms - 31 set_address_range_perms(lower byte loop) - 32 set_address_range_perms(quadword loop) - 33 set_address_range_perms(upper byte loop) - =20 - 35 make_noaccess - 36 make_writable - 37 make_readable -A 38 make_accessible - - 40 copy_address_range_state - 41 copy_address_range_state(byte loop) - 42 check_writable - 43 check_writable(byte loop) - 44 check_readable - 45 check_readable(byte loop) - 46 check_readable_asciiz - 47 check_readable_asciiz(byte loop) -A 48 check_accessible -A 49 check_accessible(byte loop) - - 50 make_noaccess_aligned - 51 make_writable_aligned - -M 60 helperc_LOADV4 -M 61 helperc_STOREV4 -M 62 helperc_LOADV2 -M 63 helperc_STOREV2 -M 64 helperc_LOADV1 -M 65 helperc_STOREV1 - -A 66 helperc_ACCESS4 -A 67 helperc_ACCESS2 -A 68 helperc_ACCESS1 - -M 70 rim_rd_V4_SLOWLY -M 71 rim_wr_V4_SLOWLY -M 72 rim_rd_V2_SLOWLY -M 73 rim_wr_V2_SLOWLY -M 74 rim_rd_V1_SLOWLY -M 75 rim_wr_V1_SLOWLY - -A 76 ACCESS4_SLOWLY -A 77 ACCESS2_SLOWLY -A 78 ACCESS1_SLOWLY - - 80 fpu_read - 81 fpu_read aligned 4 - 82 fpu_read aligned 8 - 83 fpu_read 2 - 84 fpu_read 10/28/108/512 - -M 85 fpu_write -M 86 fpu_write aligned 4 -M 87 fpu_write aligned 8 -M 88 fpu_write 2 -M 89 fpu_write 10/28/108/512 - - 90 fpu_access - 91 fpu_access aligned 4 - 92 fpu_access aligned 8 - 93 fpu_access 2 - 94 fpu_access 10/28/108/512 - - 100 fpu_access_check_SLOWLY - 101 fpu_access_check_SLOWLY(byte loop) - - 110 new_mem_stack_4 - 111 new_mem_stack_8 - 112 new_mem_stack_12 - 113 new_mem_stack_16 - 114 new_mem_stack_32 - 115 new_mem_stack - - 120 die_mem_stack_4 - 121 die_mem_stack_8 - 122 die_mem_stack_12 - 123 die_mem_stack_16 - 124 die_mem_stack_32 - 125 die_mem_stack -*/ - #ifdef MC_PROFILE_MEMORY =20 UInt MC_(event_ctr)[N_PROF_EVENTS]; |
|
From: <sv...@va...> - 2005-12-26 19:34:01
|
Author: sewardj
Date: 2005-12-26 19:33:55 +0000 (Mon, 26 Dec 2005)
New Revision: 1513
Log:
Performance improvements for flag handling.
Modified:
trunk/priv/guest-amd64/ghelpers.c
trunk/priv/host-amd64/isel.c
Modified: trunk/priv/guest-amd64/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-amd64/ghelpers.c 2005-12-26 19:33:24 UTC (rev 1512)
+++ trunk/priv/guest-amd64/ghelpers.c 2005-12-26 19:33:55 UTC (rev 1513)
@@ -899,6 +899,18 @@
=20
/*---------------- SUBQ ----------------*/
=20
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondZ)) {
+ /* long long sub/cmp, then Z --> test dst=3D=3Dsrc */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpEQ64,cc_dep1,cc_dep2));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNZ)) {
+ /* long long sub/cmp, then NZ --> test dst!=3Dsrc */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,cc_dep1,cc_dep2));
+ }
+
if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondL)) {
/* long long sub/cmp, then L (signed less than)=20
--> test dst <s src */
@@ -913,16 +925,39 @@
binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
}
=20
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondNB)) {
+ /* long long sub/cmp, then NB (unsigned greater than or equal)
+ --> test src <=3Du dst */
+ /* Note, args are opposite way round from the usual */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
+ }
+
+ if (isU64(cc_op, AMD64G_CC_OP_SUBQ) && isU64(cond, AMD64CondBE)) {
+ /* long long sub/cmp, then BE (unsigned less than or equal)
+ --> test dst <=3Du src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
+ }
+
/*---------------- SUBL ----------------*/
=20
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
/* long sub/cmp, then Z --> test dst=3D=3Dsrc */
return unop(Iop_1Uto64,
- binop(Iop_CmpEQ32,=20
- unop(Iop_64to32,cc_dep1),=20
- unop(Iop_64to32,cc_dep2)));
+ binop(Iop_CmpEQ64,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
}
=20
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
+ /* long sub/cmp, then NZ --> test dst!=3Dsrc */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpNE64,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
+
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondNZ)=
) {
//.. /* long sub/cmp, then NZ --> test dst!=3Dsrc */
//.. return unop(Iop_1Uto32,
@@ -936,7 +971,6 @@
binop(Iop_CmpLT64S,=20
binop(Iop_Shl64,cc_dep1,mkU8(32)),
binop(Iop_Shl64,cc_dep2,mkU8(32))));
-
}
=20
if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
@@ -949,14 +983,15 @@
=20
}
=20
+ if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondBE)) {
+ /* long sub/cmp, then BE (unsigned less than or equal)
+ --> test dst <=3Du src */
+ return unop(Iop_1Uto64,
+ binop(Iop_CmpLE64U,=20
+ binop(Iop_Shl64,cc_dep1,mkU8(32)),
+ binop(Iop_Shl64,cc_dep2,mkU8(32))));
+ }
=20
-//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondBE)=
) {
-//.. /* long sub/cmp, then BE (unsigned less than or equal)
-//.. --> test dst <=3Du src */
-//.. return unop(Iop_1Uto32,
-//.. binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
-//.. }
-//..=20
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBL) && isU32(cond, X86CondB))=
{
//.. /* long sub/cmp, then B (unsigned less than)
//.. --> test dst <u src */
@@ -1005,7 +1040,7 @@
=20
//.. if (isU32(cc_op, AMD64G_CC_OP_SUBB) && isU32(cond, X86CondNBE=
)) {
//.. /* long sub/cmp, then NBE (unsigned greater than)
-//.. --> test src <=3Du dst */
+//.. --> test src <u dst */
//.. /* Note, args are opposite way round from the usual */
//.. return unop(Iop_1Uto32,
//.. binop(Iop_CmpLT32U,=20
Modified: trunk/priv/host-amd64/isel.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-amd64/isel.c 2005-12-26 19:33:24 UTC (rev 1512)
+++ trunk/priv/host-amd64/isel.c 2005-12-26 19:33:55 UTC (rev 1513)
@@ -2115,7 +2115,7 @@
|| e->Iex.Binop.op =3D=3D Iop_CmpLT64S
|| e->Iex.Binop.op =3D=3D Iop_CmpLT64U
|| e->Iex.Binop.op =3D=3D Iop_CmpLE64S
- //|| e->Iex.Binop.op =3D=3D Iop_CmpLE64U
+ || e->Iex.Binop.op =3D=3D Iop_CmpLE64U
)) {
HReg r1 =3D iselIntExpr_R(env, e->Iex.Binop.arg1);
AMD64RMI* rmi2 =3D iselIntExpr_RMI(env, e->Iex.Binop.arg2);
@@ -2126,7 +2126,7 @@
case Iop_CmpLT64S: return Acc_L;
case Iop_CmpLT64U: return Acc_B;
case Iop_CmpLE64S: return Acc_LE;
- //case Iop_CmpLE64U: return Acc_BE;
+ case Iop_CmpLE64U: return Acc_BE;
default: vpanic("iselCondCode(amd64): CmpXX64");
}
}
|
|
From: <sv...@va...> - 2005-12-26 19:33:28
|
Author: sewardj
Date: 2005-12-26 19:33:24 +0000 (Mon, 26 Dec 2005)
New Revision: 1512
Log:
Comment-only fix
Modified:
trunk/priv/guest-x86/ghelpers.c
Modified: trunk/priv/guest-x86/ghelpers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/guest-x86/ghelpers.c 2005-12-24 13:14:11 UTC (rev 1511)
+++ trunk/priv/guest-x86/ghelpers.c 2005-12-26 19:33:24 UTC (rev 1512)
@@ -883,7 +883,7 @@
=20
if (isU32(cc_op, X86G_CC_OP_SUBB) && isU32(cond, X86CondNBE)) {
/* long sub/cmp, then NBE (unsigned greater than)
- --> test src <=3Du dst */
+ --> test src <u dst */
/* Note, args are opposite way round from the usual */
return unop(Iop_1Uto32,
binop(Iop_CmpLT32U,=20
|
|
From: <sv...@va...> - 2005-12-26 19:29:38
|
Author: njn
Date: 2005-12-26 19:29:33 +0000 (Mon, 26 Dec 2005)
New Revision: 5442
Log:
Remove unnecessary function pointers. And don't unnecessarily mark redzo=
nes
of new heap blocks as noaccess. Saves 30 lines of code and makes perf/he=
ap
about 3% faster.
Modified:
branches/COMPVBITS/memcheck/mc_include.h
branches/COMPVBITS/memcheck/mc_main.c
branches/COMPVBITS/memcheck/mc_malloc_wrappers.c
Modified: branches/COMPVBITS/memcheck/mc_include.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mc_include.h 2005-12-26 17:58:58 UTC (rev=
5441)
+++ branches/COMPVBITS/memcheck/mc_include.h 2005-12-26 19:29:33 UTC (rev=
5442)
@@ -95,15 +95,13 @@
/* For tracking memory pools. */
extern VgHashTable MC_(mempool_list);
=20
-/* Function pointers for the two tools to track interesting events. */
-extern void (*MC_(new_mem_heap)) ( Addr a, SizeT len, Bool is_inited );
-extern void (*MC_(ban_mem_heap)) ( Addr a, SizeT len );
-extern void (*MC_(die_mem_heap)) ( Addr a, SizeT len );
-extern void (*MC_(copy_mem_heap))( Addr from, Addr to, SizeT len );
+/* Shadow memory functions */
+extern Bool MC_(check_noaccess)( Addr a, SizeT len, Addr* bad_addr );
+extern void MC_(make_noaccess) ( Addr a, SizeT len );
+extern void MC_(make_writable) ( Addr a, SizeT len );
+extern void MC_(make_readable) ( Addr a, SizeT len );
+extern void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT le=
n );
=20
-/* Function pointers for internal sanity checking. */
-extern Bool (*MC_(check_noaccess))( Addr a, SizeT len, Addr* bad_addr );
-
extern void MC_(print_malloc_stats) ( void );
=20
extern void* MC_(malloc) ( ThreadId tid, SizeT n );
Modified: branches/COMPVBITS/memcheck/mc_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mc_main.c 2005-12-26 17:58:58 UTC (rev 54=
41)
+++ branches/COMPVBITS/memcheck/mc_main.c 2005-12-26 19:29:33 UTC (rev 54=
42)
@@ -1133,24 +1133,24 @@
=20
/* --- Set permissions for arbitrary address ranges --- */
=20
-static void mc_make_noaccess ( Addr a, SizeT len )
+void MC_(make_noaccess) ( Addr a, SizeT len )
{
- PROF_EVENT(40, "mc_make_noaccess");
- DEBUG("mc_make_noaccess(%p, %lu)\n", a, len);
+ PROF_EVENT(40, "MC_(make_noaccess)");
+ DEBUG("MC_(make_noaccess)(%p, %lu)\n", a, len);
set_address_range_perms ( a, len, VA_BITS64_NOACCESS, SM_DIST_NOACCES=
S );
}
=20
-static void mc_make_writable ( Addr a, SizeT len )
+void MC_(make_writable) ( Addr a, SizeT len )
{
- PROF_EVENT(41, "mc_make_writable");
- DEBUG("mc_make_writable(%p, %lu)\n", a, len);
+ PROF_EVENT(41, "MC_(make_writable)");
+ DEBUG("MC_(make_writable)(%p, %lu)\n", a, len);
set_address_range_perms ( a, len, VA_BITS64_WRITABLE, SM_DIST_WRITABL=
E );
}
=20
-static void mc_make_readable ( Addr a, SizeT len )
+void MC_(make_readable) ( Addr a, SizeT len )
{
- PROF_EVENT(42, "mc_make_readable");
- DEBUG("mc_make_readable(%p, %lu)\n", a, len);
+ PROF_EVENT(42, "MC_(make_readable)");
+ DEBUG("MC_(make_readable)(%p, %lu)\n", a, len);
set_address_range_perms ( a, len, VA_BITS64_READABLE, SM_DIST_READABL=
E );
}
=20
@@ -1158,26 +1158,26 @@
/* --- Block-copy permissions (needed for implementing realloc() and
sys_mremap). --- */
=20
-static void mc_copy_address_range_state ( Addr src, Addr dst, SizeT len =
)
+void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
{
SizeT i, j;
=20
- DEBUG("mc_copy_address_range_state\n");
- PROF_EVENT(50, "mc_copy_address_range_state");
+ DEBUG("MC_(copy_address_range_state)\n");
+ PROF_EVENT(50, "MC_(copy_address_range_state)");
=20
if (len =3D=3D 0)
return;
=20
if (src < dst) {
for (i =3D 0, j =3D len-1; i < len; i++, j--) {
- PROF_EVENT(51, "mc_copy_address_range_state(loop)");
+ PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
set_vabits8( dst+j, get_vabits8( src+j ) );
}
}
=20
if (src > dst) {
for (i =3D 0; i < len; i++) {
- PROF_EVENT(51, "mc_copy_address_range_state(loop)");
+ PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
set_vabits8( dst+i, get_vabits8( src+i ) );
}
}
@@ -1195,13 +1195,13 @@
PROF_EVENT(300, "make_aligned_word32_writable");
=20
# if VG_DEBUG_MEMORY >=3D 2
- mc_make_writable(a, 4);
+ MC_(make_writable)(a, 4);
return;
# endif
=20
if (EXPECTED_NOT_TAKEN(a > MAX_PRIMARY_ADDRESS)) {
PROF_EVENT(301, "make_aligned_word32_writable-slow1");
- mc_make_writable(a, 4);
+ MC_(make_writable)(a, 4);
return;
}
=20
@@ -1220,13 +1220,13 @@
PROF_EVENT(310, "make_aligned_word32_noaccess");
=20
# if VG_DEBUG_MEMORY >=3D 2
- mc_make_noaccess(a, 4);
+ MC_(make_noaccess)(a, 4);
return;
# endif
=20
if (EXPECTED_NOT_TAKEN(a > MAX_PRIMARY_ADDRESS)) {
PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
- mc_make_noaccess(a, 4);
+ MC_(make_noaccess)(a, 4);
return;
}
=20
@@ -1246,13 +1246,13 @@
PROF_EVENT(320, "make_aligned_word64_writable");
=20
# if VG_DEBUG_MEMORY >=3D 2
- mc_make_writable(a, 8);
+ MC_(make_writable)(a, 8);
return;
# endif
=20
if (EXPECTED_NOT_TAKEN(a > MAX_PRIMARY_ADDRESS)) {
PROF_EVENT(321, "make_aligned_word64_writable-slow1");
- mc_make_writable(a, 8);
+ MC_(make_writable)(a, 8);
return;
}
=20
@@ -1271,13 +1271,13 @@
PROF_EVENT(330, "make_aligned_word64_noaccess");
=20
# if VG_DEBUG_MEMORY >=3D 2
- mc_make_noaccess(a, 8);
+ MC_(make_noaccess)(a, 8);
return;
# endif
=20
if (EXPECTED_NOT_TAKEN(a > MAX_PRIMARY_ADDRESS)) {
PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
- mc_make_noaccess(a, 8);
+ MC_(make_noaccess)(a, 8);
return;
}
=20
@@ -1297,7 +1297,7 @@
if (VG_IS_4_ALIGNED(new_SP)) {
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP );
} else {
- mc_make_writable ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
}
}
=20
@@ -1307,7 +1307,7 @@
if (VG_IS_4_ALIGNED(new_SP)) {
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 )=
;
} else {
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
}
}
=20
@@ -1320,7 +1320,7 @@
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP )=
;
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP+4 )=
;
} else {
- mc_make_writable ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
}
}
=20
@@ -1333,7 +1333,7 @@
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 )=
;
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 )=
;
} else {
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
}
}
=20
@@ -1347,7 +1347,7 @@
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP )=
;
make_aligned_word64_writable ( -VG_STACK_REDZONE_SZB + new_SP+4 )=
;
} else {
- mc_make_writable ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
}
}
=20
@@ -1362,7 +1362,7 @@
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 =
);
make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 =
);
} else {
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
}
}
=20
@@ -1377,7 +1377,7 @@
make_aligned_word64_writable ( -VG_STACK_REDZONE_SZB + new_SP+4 =
);
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP+12 =
);
} else {
- mc_make_writable ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
}
}
=20
@@ -1392,7 +1392,7 @@
make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 =
);
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 =
);
} else {
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
}
}
=20
@@ -1411,7 +1411,7 @@
make_aligned_word64_writable ( -VG_STACK_REDZONE_SZB + new_SP+20 =
);
make_aligned_word32_writable ( -VG_STACK_REDZONE_SZB + new_SP+28 =
);
} else {
- mc_make_writable ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
}
}
=20
@@ -1430,20 +1430,20 @@
make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 =
);
make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 =
);
} else {
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
}
}
=20
static void mc_new_mem_stack ( Addr a, SizeT len )
{
PROF_EVENT(115, "new_mem_stack");
- mc_make_writable ( -VG_STACK_REDZONE_SZB + a, len );
+ MC_(make_writable) ( -VG_STACK_REDZONE_SZB + a, len );
}
=20
static void mc_die_mem_stack ( Addr a, SizeT len )
{
PROF_EVENT(125, "die_mem_stack");
- mc_make_noaccess ( -VG_STACK_REDZONE_SZB + a, len );
+ MC_(make_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
}
=20
=20
@@ -1483,7 +1483,7 @@
=20
# if 0
/* Really slow version */
- mc_make_writable(base, len);
+ MC_(make_writable)(base, len);
# endif
=20
# if 0
@@ -1510,7 +1510,7 @@
make_aligned_word64_writable(base + 112);
make_aligned_word64_writable(base + 120);
} else {
- mc_make_writable(base, len);
+ MC_(make_writable)(base, len);
}
# endif=20
=20
@@ -1560,7 +1560,7 @@
}
=20
/* else fall into slow case */
- mc_make_writable(base, len);
+ MC_(make_writable)(base, len);
}
=20
=20
@@ -1585,7 +1585,7 @@
returns False, and if bad_addr is non-NULL, sets *bad_addr to
indicate the lowest failing address. Functions below are
similar. */
-static Bool mc_check_noaccess ( Addr a, SizeT len, Addr* bad_addr )
+Bool MC_(check_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
{
SizeT i;
UWord vabits8;
@@ -1768,29 +1768,19 @@
/* Ignore the permissions, just make it readable. Seems to work... *=
/
DEBUG("mc_new_mem_startup(%p, %llu, rr=3D%u, ww=3D%u, xx=3D%u)\n",
a,(ULong)len,rr,ww,xx);
- mc_make_readable(a, len);
+ MC_(make_readable)(a, len);
}
=20
static
-void mc_new_mem_heap ( Addr a, SizeT len, Bool is_inited )
-{
- if (is_inited) {
- mc_make_readable(a, len);
- } else {
- mc_make_writable(a, len);
- }
-}
-
-static
void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
{
- mc_make_readable(a, len);
+ MC_(make_readable)(a, len);
}
=20
static
void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
{
- mc_make_readable(a, len);
+ MC_(make_readable)(a, len);
}
=20
=20
@@ -3713,17 +3703,17 @@
break;
=20
case VG_USERREQ__MAKE_NOACCESS: /* make no access */
- mc_make_noaccess ( arg[1], arg[2] );
+ MC_(make_noaccess) ( arg[1], arg[2] );
*ret =3D -1;
break;
=20
case VG_USERREQ__MAKE_WRITABLE: /* make writable */
- mc_make_writable ( arg[1], arg[2] );
+ MC_(make_writable) ( arg[1], arg[2] );
*ret =3D -1;
break;
=20
case VG_USERREQ__MAKE_READABLE: /* make readable */
- mc_make_readable ( arg[1], arg[2] );
+ MC_(make_readable) ( arg[1], arg[2] );
*ret =3D -1;
break;
=20
@@ -4124,18 +4114,12 @@
MC_(realloc),
MC_MALLOC_REDZONE_SZB );
=20
- MC_( new_mem_heap) =3D mc_new_mem_heap;
- MC_( ban_mem_heap) =3D mc_make_noaccess;
- MC_(copy_mem_heap) =3D mc_copy_address_range_state;
- MC_( die_mem_heap) =3D mc_make_noaccess;
- MC_(check_noaccess) =3D mc_check_noaccess;
-
VG_(track_new_mem_startup) ( mc_new_mem_startup );
- VG_(track_new_mem_stack_signal)( mc_make_writable );
- VG_(track_new_mem_brk) ( mc_make_writable );
+ VG_(track_new_mem_stack_signal)( MC_(make_writable) );
+ VG_(track_new_mem_brk) ( MC_(make_writable) );
VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
=20
- VG_(track_copy_mem_remap) ( mc_copy_address_range_state );
+ VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
=20
// Nb: we don't do anything with mprotect. This means that V bits ar=
e
// preserved if a program, for example, marks some memory as inaccess=
ible
@@ -4148,9 +4132,9 @@
// distinct from V bits, then we could handle all this properly.
VG_(track_change_mem_mprotect) ( NULL );
=20
- VG_(track_die_mem_stack_signal)( mc_make_noaccess );=20
- VG_(track_die_mem_brk) ( mc_make_noaccess );
- VG_(track_die_mem_munmap) ( mc_make_noaccess );=20
+ VG_(track_die_mem_stack_signal)( MC_(make_noaccess) );=20
+ VG_(track_die_mem_brk) ( MC_(make_noaccess) );
+ VG_(track_die_mem_munmap) ( MC_(make_noaccess) );=20
=20
VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
@@ -4166,7 +4150,7 @@
VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
VG_(track_die_mem_stack) ( mc_die_mem_stack );
=20
- VG_(track_ban_mem_stack) ( mc_make_noaccess );
+ VG_(track_ban_mem_stack) ( MC_(make_noaccess) );
=20
VG_(track_pre_mem_read) ( mc_check_is_readable );
VG_(track_pre_mem_read_asciiz) ( mc_check_is_readable_asciiz );
Modified: branches/COMPVBITS/memcheck/mc_malloc_wrappers.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mc_malloc_wrappers.c 2005-12-26 17:58:58 =
UTC (rev 5441)
+++ branches/COMPVBITS/memcheck/mc_malloc_wrappers.c 2005-12-26 19:29:33 =
UTC (rev 5442)
@@ -51,16 +51,7 @@
static SizeT cmalloc_n_frees =3D 0;
static SizeT cmalloc_bs_mallocd =3D 0;
=20
-/* Function pointers for tracking interesting events. */
-void (*MC_(new_mem_heap)) ( Addr a, SizeT len, Bool is_inited ) =3D NUL=
L;
-void (*MC_(ban_mem_heap)) ( Addr a, SizeT len ) =3D NUL=
L;
-void (*MC_(die_mem_heap)) ( Addr a, SizeT len ) =3D NUL=
L;
-void (*MC_(copy_mem_heap))( Addr from, Addr to, SizeT len ) =3D NUL=
L;
=20
-/* Function pointers for internal sanity checking. */
-Bool (*MC_(check_noaccess))( Addr a, SizeT len, Addr* bad_addr ) =3D NUL=
L;
-
-
/*------------------------------------------------------------*/
/*--- Tracking malloc'd and free'd blocks ---*/
/*------------------------------------------------------------*/
@@ -198,9 +189,10 @@
=20
VG_(HT_add_node)( table, create_MC_Chunk(tid, p, size, kind) );
=20
- MC_(ban_mem_heap)( p-rzB, rzB );
- MC_(new_mem_heap)( p, size, is_zeroed );
- MC_(ban_mem_heap)( p+size, rzB );
+ if (is_zeroed)
+ MC_(make_readable)( p, size );
+ else
+ MC_(make_writable)( p, size );
=20
return (void*)p;
}
@@ -263,11 +255,9 @@
static
void die_and_free_mem ( ThreadId tid, MC_Chunk* mc, SizeT rzB )
{
- /* Note: ban redzones again -- just in case user de-banned them
- with a client request... */
- MC_(ban_mem_heap)( mc->data-rzB, rzB );
- MC_(die_mem_heap)( mc->data, mc->size );
- MC_(ban_mem_heap)( mc->data+mc->size, rzB );
+ /* Note: make redzones noaccess again -- just in case user made them
+ accessible with a client request... */
+ MC_(make_noaccess)( mc->data-rzB, mc->size + 2*rzB );
=20
/* Put it out of harm's way for a while, if not from a client request=
*/
if (MC_AllocCustom !=3D mc->allockind) {
@@ -353,7 +343,7 @@
=20
} else if (old_size > new_size) {
/* new size is smaller */
- MC_(die_mem_heap)( mc->data+new_size, mc->size-new_size );
+ MC_(make_noaccess)( mc->data+new_size, mc->size-new_size );
mc->size =3D new_size;
mc->where =3D VG_(record_ExeContext)(tid);
p_new =3D p_old;
@@ -365,10 +355,10 @@
=20
if (a_new) {
/* First half kept and copied, second half new, red zones as no=
rmal */
- MC_(ban_mem_heap) ( a_new-MC_MALLOC_REDZONE_SZB, MC_MALLOC_REDZ=
ONE_SZB );
- MC_(copy_mem_heap)( (Addr)p_old, a_new, mc->size );
- MC_(new_mem_heap) ( a_new+mc->size, new_size-mc->size, /*init'd=
*/False );
- MC_(ban_mem_heap) ( a_new+new_size, MC_MALLOC_REDZONE_SZB );
+ MC_(make_noaccess)( a_new-MC_MALLOC_REDZONE_SZB, MC_MALLOC_REDZ=
ONE_SZB );
+ MC_(copy_address_range_state)( (Addr)p_old, a_new, mc->size );
+ MC_(make_writable)( a_new+mc->size, new_size-mc->size );
+ MC_(make_noaccess)( a_new+new_size, MC_MALLOC_REDZONE_SZB );
=20
/* Copy from old to new */
VG_(memcpy)((void*)a_new, p_old, mc->size);
@@ -434,11 +424,9 @@
// Clean up the chunks, one by one
VG_(HT_ResetIter)(mp->chunks);
while ( (mc =3D VG_(HT_Next)(mp->chunks)) ) {
- /* Note: ban redzones again -- just in case user de-banned them
- with a client request... */
- MC_(ban_mem_heap)(mc->data-mp->rzB, mp->rzB );
- MC_(die_mem_heap)(mc->data, mc->size );
- MC_(ban_mem_heap)(mc->data+mc->size, mp->rzB );
+ /* Note: make redzones noaccess again -- just in case user made th=
em
+ accessible with a client request... */
+ MC_(make_noaccess)(mc->data-mp->rzB, mc->size + 2*mp->rzB );
}
// Destroy the chunk table
VG_(HT_destruct)(mp->chunks);
|
|
From: <sv...@va...> - 2005-12-26 18:30:08
|
Author: njn
Date: 2005-12-26 18:29:59 +0000 (Mon, 26 Dec 2005)
New Revision: 263
Log:
tweak formatting.
Modified:
trunk/docs/pubs.html
Modified: trunk/docs/pubs.html
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/docs/pubs.html 2005-12-26 18:24:41 UTC (rev 262)
+++ trunk/docs/pubs.html 2005-12-26 18:29:59 UTC (rev 263)
@@ -7,20 +7,20 @@
<ul>
=20
<li><p>
- <a href=3D"/docs/memcheck2005.pdf">Using Valgrind to detect undefined v=
alue
+ <b><a href=3D"/docs/memcheck2005.pdf">Using Valgrind to detect undefine=
d value
errors with bit-precision.</a><br>
Julian Seward and Nicholas Nethercote.<br>
Proceedings of the USENIX'05 Annual Technical Conference, Anaheim,
- California, USA, April 2005.<br>
+ California, USA, April 2005.</b><br>
This paper describes in detail how Memcheck's undefined value error
detection (a.k.a. V bits) works. Feel free to cite it if you are tal=
king
particularly about Memcheck's undefined value error detection.
</p></li>
=20
<li><p>
- <a href=3D"/docs/phd2004.pdf">Dynamic Binary Analysis and Instrumentati=
on.</a><br>
+ <b><a href=3D"/docs/phd2004.pdf">Dynamic Binary Analysis and Instrument=
ation.</a><br>
Nicholas Nethercote.<br>
- PhD Dissertation, University of Cambridge, November 2004.<br>
+ PhD Dissertation, University of Cambridge, November 2004.</b><br>
This dissertation describes Valgrind in some detail (some of these det=
ails
are now out-of-date) as well as Cachegrind, Annelid and Redux; it als=
o
covers some underlying theory about dynamic binary analysis in general=
and
@@ -31,27 +31,27 @@
</p></li>
=20
<li><p>
- <a href=3D"/docs/bounds-checking2004.ps.bz2">Bounds-Checking Entire Pro=
grams Without
+ <b><a href=3D"/docs/bounds-checking2004.ps.bz2">Bounds-Checking Entire =
Programs Without
Recompiling.</a><br>
Nicholas Nethercote and Jeremy Fitzhardinge.<br>
Informal Proceedings of the Second Workshop on Semantics, Program
Analysis, and Computing Environments for Memory Management (SPACE 20=
04),
- Venice, Italy, January 2004.<br>
+ Venice, Italy, January 2004.</b><br>
This paper describes Annelid, an experimental bounds checker.
</p></li>
=20
<li><p>
- <a href=3D"/docs/valgrind2003.ps.bz2">Valgrind: A Program Supervision F=
ramework.</a> (<a href=3D"/gallery/valgrind2003-talk.ps.bz2">slides</a>)<=
br>
+ <b><a href=3D"/docs/valgrind2003.ps.bz2">Valgrind: A Program Supervisio=
n Framework.</a> (<a href=3D"/gallery/valgrind2003-talk.ps.bz2">slides</a=
>)<br>
Nicholas Nethercote and Julian Seward.<br>
- Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.
+ Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.</b><b=
r>
This paper describes Valgrind in general, but is somewhat out-of-date.
Feel free to cite it when talking about Valgrind in general.
</p></li>
=20
<li><p>
- <a href=3D"/docs/redux2003.ps.bz2">Redux: A Dynamic Dataflow Tracer.</a=
><br>
+ <b><a href=3D"/docs/redux2003.ps.bz2">Redux: A Dynamic Dataflow Tracer.=
</a><br>
Nicholas Nethercote and Alan Mycroft.<br>
- Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.
+ Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.</b><b=
r>
This paper describes Redux, and experimental dynamic dataflow tracing
tool.
</p></li>
|
|
From: <sv...@va...> - 2005-12-26 18:24:45
|
Author: njn
Date: 2005-12-26 18:24:41 +0000 (Mon, 26 Dec 2005)
New Revision: 262
Log:
Added descriptions and citation guidelines to the publications.
Modified:
trunk/docs/pubs.html
Modified: trunk/docs/pubs.html
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/docs/pubs.html 2005-12-22 18:21:55 UTC (rev 261)
+++ trunk/docs/pubs.html 2005-12-26 18:24:41 UTC (rev 262)
@@ -1,7 +1,8 @@
<h1>Valgrind Publications</h1>
=20
<p>Here are some academic publications that have been written by Valgrin=
d
-developers.</p>
+developers. If you refer to Valgrind in a publication, please cite one =
or
+more of the following papers, not just the Valgrind website.</p>
=20
<ul>
=20
@@ -10,13 +11,23 @@
errors with bit-precision.</a><br>
Julian Seward and Nicholas Nethercote.<br>
Proceedings of the USENIX'05 Annual Technical Conference, Anaheim,
- California, USA, April 2005.
+ California, USA, April 2005.<br>
+ This paper describes in detail how Memcheck's undefined value error
+ detection (a.k.a. V bits) works. Feel free to cite it if you are tal=
king
+ particularly about Memcheck's undefined value error detection.
</p></li>
=20
<li><p>
<a href=3D"/docs/phd2004.pdf">Dynamic Binary Analysis and Instrumentati=
on.</a><br>
Nicholas Nethercote.<br>
- PhD Dissertation, University of Cambridge, November 2004.
+ PhD Dissertation, University of Cambridge, November 2004.<br>
+ This dissertation describes Valgrind in some detail (some of these det=
ails
+ are now out-of-date) as well as Cachegrind, Annelid and Redux; it als=
o
+ covers some underlying theory about dynamic binary analysis in general=
and
+ what all these tools have in common. Feel free to cite it if you are
+ talking about Valgrind in general (although the ENTCS Valgrind paper
+ should probably be cited in preference, even though it is older),
+ Cachegrind, or the dynamic binary analysis theory work.
</p></li>
=20
<li><p>
@@ -25,19 +36,24 @@
Nicholas Nethercote and Jeremy Fitzhardinge.<br>
Informal Proceedings of the Second Workshop on Semantics, Program
Analysis, and Computing Environments for Memory Management (SPACE 20=
04),
- Venice, Italy, January 2004.
+ Venice, Italy, January 2004.<br>
+ This paper describes Annelid, an experimental bounds checker.
</p></li>
=20
<li><p>
<a href=3D"/docs/valgrind2003.ps.bz2">Valgrind: A Program Supervision F=
ramework.</a> (<a href=3D"/gallery/valgrind2003-talk.ps.bz2">slides</a>)<=
br>
Nicholas Nethercote and Julian Seward.<br>
Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.
+ This paper describes Valgrind in general, but is somewhat out-of-date.
+ Feel free to cite it when talking about Valgrind in general.
</p></li>
=20
<li><p>
<a href=3D"/docs/redux2003.ps.bz2">Redux: A Dynamic Dataflow Tracer.</a=
><br>
Nicholas Nethercote and Alan Mycroft.<br>
Electronic Notes in Theoretical Computer Science 89 No. 2, 2003.
+ This paper describes Redux, and experimental dynamic dataflow tracing
+ tool.
</p></li>
=20
</ul>
|
|
From: <sv...@va...> - 2005-12-26 17:59:03
|
Author: sewardj
Date: 2005-12-26 17:58:58 +0000 (Mon, 26 Dec 2005)
New Revision: 5441
Log:
More dispatcher tuning for ppc32/64. Makes a big difference for
perf/tinycc.
- run_thread_for_a_while: just clear this thread's reservation when
starting, not all of them.
- use a different fast-cache hashing function for ppc32/64 than for
x86/amd64. This allows the former to use all the fast-cache entries
rather than just 1/4 of them.
Modified:
trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
trunk/coregrind/m_scheduler/scheduler.c
trunk/coregrind/m_transtab.c
trunk/coregrind/pub_core_transtab_asm.h
Modified: trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-12-26 17:50:22=
UTC (rev 5440)
+++ trunk/coregrind/m_dispatch/dispatch-ppc32-linux.S 2005-12-26 17:58:58=
UTC (rev 5441)
@@ -254,14 +254,14 @@
stw 3,OFFSET_ppc32_CIA(31)
=20
/* Are we out of timeslice? If yes, defer to scheduler. */
-// subic. 29,29,1
subi 29,29,1
cmplwi 29,0
beq counter_is_zero
=20
/* try a fast lookup in the translation cache */
- /* r4=3D((r3<<2) & (VG_TT_FAST_MASK<<2)) */
- rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2 =20
+ /* r4 =3D VG_TT_FAST_HASH(addr) * sizeof(ULong)
+ =3D ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */
+ rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2 =20
addis 5,4,VG_(tt_fast)@ha
lwz 5,VG_(tt_fast)@l(5)
lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */
@@ -310,12 +310,14 @@
stw 3,OFFSET_ppc32_CIA(31)
=20
/* Are we out of timeslice? If yes, defer to scheduler. */
- addic. 29,29,-1
+ subi 29,29,1
+ cmplwi 29,0
beq counter_is_zero
=20
/* try a fast lookup in the translation cache */
- /* r4=3D((r3<<2) & (VG_TT_FAST_MASK<<2)) */
- rlwinm 4,3, 2, 32-2-VG_TT_FAST_BITS, 31-2 =20
+ /* r4 =3D VG_TT_FAST_HASH(addr) * sizeof(ULong)
+ =3D ((r3 >>u 2) & VG_TT_FAST_MASK) << 2 */
+ rlwinm 4,3, 0, 32-2-VG_TT_FAST_BITS, 31-2=20
addis 5,4,VG_(tt_fast)@ha
lwz 5,VG_(tt_fast)@l(5)
lwz 6,4(5) /* big-endian, so comparing 2nd 32bit word */
Modified: trunk/coregrind/m_scheduler/scheduler.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_scheduler/scheduler.c 2005-12-26 17:50:22 UTC (rev =
5440)
+++ trunk/coregrind/m_scheduler/scheduler.c 2005-12-26 17:58:58 UTC (rev =
5441)
@@ -331,8 +331,8 @@
VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
}
=20
-/* Use libc setjmp/longjmp. longjmp must not restore signal mask
- state, but does need to pass "val" through. */
+/* Use gcc's built-in setjmp/longjmp. longjmp must not restore signal
+ mask state, but does need to pass "val" through. */
#define SCHEDSETJMP(tid, jumped, stmt) \
do { \
ThreadState * volatile _qq_tst =3D VG_(get_ThreadState)(tid); \
@@ -343,7 +343,8 @@
_qq_tst->sched_jmpbuf_valid =3D True; \
stmt; \
} else if (VG_(clo_trace_sched)) \
- VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=3D%d\n", __LINE__, ti=
d, jumped); \
+ VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=3D%d\n", \
+ __LINE__, tid, jumped); =
\
vg_assert(_qq_tst->sched_jmpbuf_valid); \
_qq_tst->sched_jmpbuf_valid =3D False; \
} while(0)
@@ -370,7 +371,6 @@
=20
/* Paranoia */
vg_assert(VG_(is_valid_tid)(tid));
- vg_assert(VG_(is_valid_tid)(tid));
vg_assert(VG_(is_running_thread)(tid));
vg_assert(!VG_(is_exiting)(tid));
=20
@@ -408,11 +408,9 @@
=20
This should be abstractified and lifted out.
*/
- { Int i;
- /* Clear any existing reservation. Be paranoid and clear them all.=
*/
- for (i =3D 0; i < VG_N_THREADS; i++)
- VG_(threads)[i].arch.vex.guest_RESVN =3D 0;
- }
+ /* Clear any existing reservation that this thread might have made
+ last time it was running. */
+ VG_(threads)[tid].arch.vex.guest_RESVN =3D 0;
=20
/* ppc guest_state vector regs must be 16byte aligned for loads/store=
s */
vg_assert(VG_IS_16_ALIGNED(VG_(threads)[tid].arch.vex.guest_VR0));
@@ -422,7 +420,8 @@
/* there should be no undealt-with signals */
//vg_assert(VG_(threads)[tid].siginfo.si_signo =3D=3D 0);
=20
- //VG_(printf)("running EIP =3D %p ESP=3D%p\n", VG_(threads)[tid].arch=
.m_eip, VG_(threads)[tid].arch.m_esp);
+ //VG_(printf)("running EIP =3D %p ESP=3D%p\n",
+ //VG_(threads)[tid].arch.m_eip, VG_(threads)[tid].arch.m_esp);
=20
vg_assert(VG_(my_fault));
VG_(my_fault) =3D False;
Modified: trunk/coregrind/m_transtab.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_transtab.c 2005-12-26 17:50:22 UTC (rev 5440)
+++ trunk/coregrind/m_transtab.c 2005-12-26 17:58:58 UTC (rev 5441)
@@ -606,7 +606,7 @@
=20
static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count )
{
- UInt cno =3D ((UInt)key) & VG_TT_FAST_MASK;
+ UInt cno =3D (UInt)VG_TT_FAST_HASH(key);
VG_(tt_fast)[cno] =3D tce;
VG_(tt_fastN)[cno] =3D count;
n_fast_updates++;
Modified: trunk/coregrind/pub_core_transtab_asm.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/pub_core_transtab_asm.h 2005-12-26 17:50:22 UTC (rev =
5440)
+++ trunk/coregrind/pub_core_transtab_asm.h 2005-12-26 17:58:58 UTC (rev =
5441)
@@ -31,11 +31,31 @@
#ifndef __PUB_CORE_TRANSTAB_ASM_H
#define __PUB_CORE_TRANSTAB_ASM_H
=20
-/* Constants for the fast translation lookup cache. */
+/* Constants for the fast translation lookup cache. It is a direct
+ mapped cache, with 2^VG_TT_FAST_BITS entries.
+
+ On x86/amd64, the cache index is computed as
+ 'address[VG_TT_FAST_BITS-1 : 0]'.
+
+ On ppc32/ppc64, the bottom two bits of instruction addresses are
+ zero, which means that function causes only 1/4 of the entries to
+ ever be used. So instead the function is '(address >>u
+ 2)[VG_TT_FAST_BITS-1 : 0]' on those targets. */
+
#define VG_TT_FAST_BITS 15
#define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS)
#define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1)
=20
+/* This macro isn't usable in asm land; nevertheless this seems
+ like a good place to put it. */
+#if defined(VGA_x86) || defined(VGA_amd64)
+# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) ) & VG_TT_FAST_=
MASK)
+#elif defined(VGA_ppc32) || defined(VGA_ppc64)
+# define VG_TT_FAST_HASH(_addr) ((((UWord)(_addr)) >> 2) & VG_TT_FAST_=
MASK)
+#else
+# error "VG_TT_FAST_HASH: unknown platform"
+#endif
+
#endif // __PUB_CORE_TRANSTAB_ASM_H
=20
/*--------------------------------------------------------------------*/
|
|
From: <sv...@va...> - 2005-12-26 17:50:28
|
Author: njn
Date: 2005-12-26 17:50:22 +0000 (Mon, 26 Dec 2005)
New Revision: 5440
Log:
code layout wibbles only
Modified:
trunk/coregrind/m_mallocfree.c
Modified: trunk/coregrind/m_mallocfree.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_mallocfree.c 2005-12-26 03:54:49 UTC (rev 5439)
+++ trunk/coregrind/m_mallocfree.c 2005-12-26 17:50:22 UTC (rev 5440)
@@ -437,7 +437,7 @@
{
static Bool client_inited =3D False;
static Bool nonclient_inited =3D False;
- static SizeT client_redzone_szB =3D 8; // default: be paranoid
+ static SizeT client_rz_szB =3D 8; // default: be paranoid
=20
/* We use checked red zones (of various sizes) for our internal stuff=
,
and an unchecked zone of arbitrary size for the client. Of
@@ -458,23 +458,23 @@
// redzone size with VG_(needs_malloc_replacement)() after this=
module
// has done its first allocation from the client arena.
if (VG_(needs).malloc_replacement)
- vg_assert(client_redzone_szB =3D=3D VG_(tdict).tool_client_r=
edzone_szB);
+ vg_assert(client_rz_szB =3D=3D VG_(tdict).tool_client_redzon=
e_szB);
return;
}
=20
// Check and set the client arena redzone size
if (VG_(needs).malloc_replacement) {
- client_redzone_szB =3D VG_(tdict).tool_client_redzone_szB;
+ client_rz_szB =3D VG_(tdict).tool_client_redzone_szB;
// 128 is no special figure, just something not too big
- if (client_redzone_szB > 128) {
+ if (client_rz_szB > 128) {
VG_(printf)( "\nTool error:\n"
" specified redzone size is too big (%llu)\n",=
=20
- (ULong)client_redzone_szB);
+ (ULong)client_rz_szB);
VG_(exit)(1);
}
}
// Initialise the client arena
- arena_init ( VG_AR_CLIENT, "client", client_redzone_szB, 1048=
576 );
+ arena_init ( VG_AR_CLIENT, "client", client_rz_szB, 1048576 )=
;
client_inited =3D True;
=20
} else {
@@ -482,13 +482,13 @@
return;
}
// Initialise the non-client arenas
- arena_init ( VG_AR_CORE, "core", 4, CORE_ARENA_MIN_=
SZB );
- arena_init ( VG_AR_TOOL, "tool", 4, 1048=
576 );
- arena_init ( VG_AR_SYMTAB, "symtab", 4, 1048=
576 );
- arena_init ( VG_AR_DEMANGLE, "demangle", 4, 65=
536 );
- arena_init ( VG_AR_EXECTXT, "exectxt", 4, 262=
144 );
- arena_init ( VG_AR_ERRORS, "errors", 4, 65=
536 );
- arena_init ( VG_AR_TTAUX, "ttaux", 4, 65=
536 );
+ arena_init ( VG_AR_CORE, "core", 4, 1048576 )=
;
+ arena_init ( VG_AR_TOOL, "tool", 4, 1048576 )=
;
+ arena_init ( VG_AR_SYMTAB, "symtab", 4, 1048576 )=
;
+ arena_init ( VG_AR_DEMANGLE, "demangle", 4, 65536 )=
;
+ arena_init ( VG_AR_EXECTXT, "exectxt", 4, 262144 )=
;
+ arena_init ( VG_AR_ERRORS, "errors", 4, 65536 )=
;
+ arena_init ( VG_AR_TTAUX, "ttaux", 4, 65536 )=
;
nonclient_inited =3D True;
}
=20
|
|
From: <js...@ac...> - 2005-12-26 03:57:25
|
Nightly build on phoenix ( SuSE 10.0 ) started at 2005-12-26 03:30:01 GMT Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 208 tests, 5 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) none/tests/mremap2 (stdout) none/tests/x86/faultstatus (stderr) none/tests/x86/int (stderr) |
|
From: <sv...@va...> - 2005-12-26 03:54:58
|
Author: sewardj Date: 2005-12-26 03:54:49 +0000 (Mon, 26 Dec 2005) New Revision: 5439 Log: Attempt to make tinycc work on ppc32, by (1) getting rid of various x86 isms in tinycc.c, (2) giving it a fixed, preprocessed input file to chew on, and (3) just compiling to a .o with no attempt to link. Added: trunk/perf/test_input_for_tinycc.c Modified: trunk/perf/Makefile.am trunk/perf/tinycc.c trunk/perf/tinycc.vgperf [... diff too large to include ...] |
|
From: Tom H. <to...@co...> - 2005-12-26 03:43:04
|
Nightly build on dunsmere ( athlon, Fedora Core 4 ) started at 2005-12-26 03:30:05 GMT Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 210 tests, 7 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/mempool (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) none/tests/mremap2 (stdout) none/tests/x86/faultstatus (stderr) none/tests/x86/int (stderr) |
|
From: <sv...@va...> - 2005-12-25 06:34:15
|
Author: njn
Date: 2005-12-25 06:34:04 +0000 (Sun, 25 Dec 2005)
New Revision: 5438
Log:
Merge in r5435 from COMPVBITS. Also added a note to
docs/internals/performance.txt about it.
Modified:
trunk/coregrind/m_execontext.c
trunk/coregrind/m_stacktrace.c
trunk/docs/internals/performance.txt
trunk/include/pub_tool_stacktrace.h
Modified: trunk/coregrind/m_execontext.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_execontext.c 2005-12-25 06:30:34 UTC (rev 5437)
+++ trunk/coregrind/m_execontext.c 2005-12-25 06:34:04 UTC (rev 5438)
@@ -46,7 +46,8 @@
=20
struct _ExeContext {
struct _ExeContext * next;
- /* Variable-length array. The size is VG_(clo_backtrace_size); at
+ UInt n_ips;
+ /* Variable-length array. The size is 'n_ips'; at
least 1, at most VG_DEEPEST_BACKTRACE. [0] is the current IP,
[1] is its caller, [2] is the caller of [1], etc. */
Addr ips[0];
@@ -126,38 +127,42 @@
/* Print an ExeContext. */
void VG_(pp_ExeContext) ( ExeContext* ec )
{
- VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) );
+ VG_(pp_StackTrace)( ec->ips, ec->n_ips );
}
=20
=20
/* Compare two ExeContexts, comparing all callers. */
Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
{
+ Int i;
+
if (e1 =3D=3D NULL || e2 =3D=3D NULL)=20
return False;
+
+ // Must be at least one address in each trace.
+ tl_assert(e1->n_ips >=3D 1 && e2->n_ips >=3D 1);
+
switch (res) {
case Vg_LowRes:
/* Just compare the top two callers. */
ec_cmp2s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
+ for (i =3D 0; i < 2; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_MedRes:
/* Just compare the top four callers. */
ec_cmp4s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
-
- if (VG_(clo_backtrace_size) < 3) return True;
- if (e1->ips[2] !=3D e2->ips[2]) return False;
-
- if (VG_(clo_backtrace_size) < 4) return True;
- if (e1->ips[3] !=3D e2->ips[3]) return False;
+ for (i =3D 0; i < 4; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_HighRes:
@@ -188,18 +193,20 @@
UWord hash;
ExeContext* new_ec;
ExeContext* list;
+ UInt n_ips;
=20
init_ExeContext_storage();
- vg_assert(VG_(clo_backtrace_size) >=3D 1=20
- && VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
+ vg_assert(VG_(clo_backtrace_size) >=3D 1 &&
+ VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
=20
- VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ n_ips =3D VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ tl_assert(n_ips >=3D 1);
=20
/* Now figure out if we've seen this one before. First hash it so
as to determine the list number. */
=20
hash =3D 0;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
hash ^=3D ips[i];
hash =3D (hash << 29) | (hash >> 3);
}
@@ -215,7 +222,7 @@
if (list =3D=3D NULL) break;
ec_searchcmps++;
same =3D True;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
if (list->ips[i] !=3D ips[i]) {
same =3D False;
break;=20
@@ -234,13 +241,14 @@
ec_totstored++;
=20
new_ec =3D VG_(arena_malloc)( VG_AR_EXECTXT,=20
- sizeof(struct _ExeContext *)=20
- + VG_(clo_backtrace_size) * sizeof(Addr) =
);
+ sizeof(struct _ExeContext)=20
+ + n_ips * sizeof(Addr) );
=20
- for (i =3D 0; i < VG_(clo_backtrace_size); i++)
+ for (i =3D 0; i < n_ips; i++)
new_ec->ips[i] =3D ips[i];
=20
- new_ec->next =3D ec_list[hash];
+ new_ec->n_ips =3D n_ips;
+ new_ec->next =3D ec_list[hash];
ec_list[hash] =3D new_ec;
=20
return new_ec;
Modified: trunk/coregrind/m_stacktrace.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_stacktrace.c 2005-12-25 06:30:34 UTC (rev 5437)
+++ trunk/coregrind/m_stacktrace.c 2005-12-25 06:34:04 UTC (rev 5438)
@@ -65,12 +65,9 @@
vg_assert(sizeof(Addr) =3D=3D sizeof(void*));
=20
/* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
- putting zeroes in when the trail goes cold, which we guess to be
+ stopping when the trail goes cold, which we guess to be
when FP is not a reasonable stack location. */
=20
- for (i =3D 0; i < n_ips; i++)
- ips[i] =3D 0;
-
// JRS 2002-sep-17: hack, to round up fp_max to the end of the
// current page, at least. Dunno if it helps.
// NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
Modified: trunk/docs/internals/performance.txt
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/docs/internals/performance.txt 2005-12-25 06:30:34 UTC (rev 543=
7)
+++ trunk/docs/internals/performance.txt 2005-12-25 06:34:04 UTC (rev 543=
8)
@@ -26,6 +26,9 @@
- Nick reduced the iteration count of the loop in swizzle() from 20 to 5=
,
which gave almost identical results while saving 2% in perf/tinycc and=
10%
in perf/heap on a 3GHz Prescott P4.
+- Nick changed ExeContext gathering to not record/save extra zeroes at t=
he
+ end. Saved 7% on perf/heap with --num-callers=3D50, and about 1% on
+ perf/tinycc.
=20
COMPVBITS branch:
- Nick converted to compress V bits, initial version saved 0--5% on most
Modified: trunk/include/pub_tool_stacktrace.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/include/pub_tool_stacktrace.h 2005-12-25 06:30:34 UTC (rev 5437=
)
+++ trunk/include/pub_tool_stacktrace.h 2005-12-25 06:34:04 UTC (rev 5438=
)
@@ -36,7 +36,8 @@
=20
// Walks the stack to get instruction pointers from the top stack frames=
for
// thread 'tid'. Maximum of 'n_ips' addresses put into 'ips'; 0 is the=
top
-// of the stack, 1 is its caller, etc.
+// of the stack, 1 is its caller, etc. Everything from ips[n_ips] onwar=
ds
+// is undefined and should not be read.
extern UInt VG_(get_StackTrace) ( ThreadId tid, StackTrace ips, UInt n_i=
ps );
=20
// Apply a function to every element in the StackTrace. The parameter '=
n'
|
|
From: <sv...@va...> - 2005-12-25 06:30:37
|
Author: njn
Date: 2005-12-25 06:30:34 +0000 (Sun, 25 Dec 2005)
New Revision: 5437
Log:
Update for recent change to OSet interface.
Modified:
trunk/memcheck/tests/oset_test.c
Modified: trunk/memcheck/tests/oset_test.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/memcheck/tests/oset_test.c 2005-12-25 06:27:51 UTC (rev 5436)
+++ trunk/memcheck/tests/oset_test.c 2005-12-25 06:30:34 UTC (rev 5437)
@@ -212,7 +212,7 @@
return buf;
}
=20
-static Int blockCmp(void* vkey, void* velem)
+static Word blockCmp(void* vkey, void* velem)
{
Addr key =3D *(Addr*)vkey;
Block* elem =3D (Block*)velem;
|
|
From: <sv...@va...> - 2005-12-25 06:27:54
|
Author: njn
Date: 2005-12-25 06:27:51 +0000 (Sun, 25 Dec 2005)
New Revision: 5436
Log:
add extra note about tinycc
Modified:
trunk/perf/README
Modified: trunk/perf/README
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/perf/README 2005-12-25 06:25:27 UTC (rev 5435)
+++ trunk/perf/README 2005-12-25 06:27:51 UTC (rev 5436)
@@ -58,6 +58,8 @@
Fabrice Bellard's TinyCC compiling itself multiple times.
- Strengths: A real program, lots of code (top 100 blocks only account=
for
47% of execution), involves large irregular data structur=
es
- (presumably, since it's a compiler).
+ (presumably, since it's a compiler). Does lots of
+ malloc/free calls and so changes that make a big improvem=
ent
+ to perf/heap typically cause a small improvement.
- Weaknesses None, really, it's a good benchmark.
=20
|
|
From: <sv...@va...> - 2005-12-25 06:25:33
|
Author: njn
Date: 2005-12-25 06:25:27 +0000 (Sun, 25 Dec 2005)
New Revision: 5435
Log:
Made ExeContext gathering more efficient. Previously we were getting as
many code addresses as possible, and then filling the rest of the array (=
up
to VG_(clo_backtrace_size)) with zeroes. These zero entries were then
included in the hashing, and comparisons, and space was allocated for the=
m
in saved ExeContexts.
By not putting in the zeroes, not doing any hashing/comparisons of them, =
and
not storing them, we see speed-ups for perf/heap of 5% with
--num-callers=3D12 and 7% for --num-callers=3D50, and about 1.5% for tiny=
cc.
The amount of memory for ExeContexts also drops, saving around 500KB for
tinycc with --num-callers=3D50.
I also changed the allocation of each ExeContext to use "sizeof(struct
_ExeContext)" instead of "sizeof(struct _ExeContext *)". It worked ok
up until now because the struct only contained a single pointer in the
non-variable-sized part, but it was an accident waiting to happen when
struct _ExeContext changed (and indeed did happen to me when I added
'n_ips' to the struct).
Modified:
branches/COMPVBITS/coregrind/m_execontext.c
branches/COMPVBITS/coregrind/m_stacktrace.c
branches/COMPVBITS/include/pub_tool_stacktrace.h
Modified: branches/COMPVBITS/coregrind/m_execontext.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_execontext.c 2005-12-25 03:33:12 UTC (=
rev 5434)
+++ branches/COMPVBITS/coregrind/m_execontext.c 2005-12-25 06:25:27 UTC (=
rev 5435)
@@ -47,7 +47,8 @@
=20
struct _ExeContext {
struct _ExeContext * next;
- /* Variable-length array. The size is VG_(clo_backtrace_size); at
+ UInt n_ips;
+ /* Variable-length array. The size is 'n_ips'; at
least 1, at most VG_DEEPEST_BACKTRACE. [0] is the current IP,
[1] is its caller, [2] is the caller of [1], etc. */
Addr ips[0];
@@ -127,38 +128,42 @@
/* Print an ExeContext. */
void VG_(pp_ExeContext) ( ExeContext* ec )
{
- VG_(pp_StackTrace)( ec->ips, VG_(clo_backtrace_size) );
+ VG_(pp_StackTrace)( ec->ips, ec->n_ips );
}
=20
=20
/* Compare two ExeContexts, comparing all callers. */
Bool VG_(eq_ExeContext) ( VgRes res, ExeContext* e1, ExeContext* e2 )
{
+ Int i;
+
if (e1 =3D=3D NULL || e2 =3D=3D NULL)=20
return False;
+
+ // Must be at least one address in each trace.
+ tl_assert(e1->n_ips >=3D 1 && e2->n_ips >=3D 1);
+
switch (res) {
case Vg_LowRes:
/* Just compare the top two callers. */
ec_cmp2s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
+ for (i =3D 0; i < 2; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_MedRes:
/* Just compare the top four callers. */
ec_cmp4s++;
- if (e1->ips[0] !=3D e2->ips[0]) return False;
-
- if (VG_(clo_backtrace_size) < 2) return True;
- if (e1->ips[1] !=3D e2->ips[1]) return False;
-
- if (VG_(clo_backtrace_size) < 3) return True;
- if (e1->ips[2] !=3D e2->ips[2]) return False;
-
- if (VG_(clo_backtrace_size) < 4) return True;
- if (e1->ips[3] !=3D e2->ips[3]) return False;
+ for (i =3D 0; i < 4; i++) {
+ if ( (e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return True;
+ if ( (e1->n_ips <=3D i) && !(e2->n_ips <=3D i)) return False;
+ if (!(e1->n_ips <=3D i) && (e2->n_ips <=3D i)) return False;
+ if (e1->ips[i] !=3D e2->ips[i]) return False;
+ }
return True;
=20
case Vg_HighRes:
@@ -189,20 +194,22 @@
UWord hash;
ExeContext* new_ec;
ExeContext* list;
+ UInt n_ips;
=20
VGP_PUSHCC(VgpExeContext);
=20
init_ExeContext_storage();
- vg_assert(VG_(clo_backtrace_size) >=3D 1=20
- && VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
+ vg_assert(VG_(clo_backtrace_size) >=3D 1 &&
+ VG_(clo_backtrace_size) <=3D VG_DEEPEST_BACKTRACE);
=20
- VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ n_ips =3D VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size) );
+ tl_assert(n_ips >=3D 1);
=20
/* Now figure out if we've seen this one before. First hash it so
as to determine the list number. */
=20
hash =3D 0;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
hash ^=3D ips[i];
hash =3D (hash << 29) | (hash >> 3);
}
@@ -218,7 +225,7 @@
if (list =3D=3D NULL) break;
ec_searchcmps++;
same =3D True;
- for (i =3D 0; i < VG_(clo_backtrace_size); i++) {
+ for (i =3D 0; i < n_ips; i++) {
if (list->ips[i] !=3D ips[i]) {
same =3D False;
break;=20
@@ -238,13 +245,14 @@
ec_totstored++;
=20
new_ec =3D VG_(arena_malloc)( VG_AR_EXECTXT,=20
- sizeof(struct _ExeContext *)=20
- + VG_(clo_backtrace_size) * sizeof(Addr) =
);
+ sizeof(struct _ExeContext)=20
+ + n_ips * sizeof(Addr) );
=20
- for (i =3D 0; i < VG_(clo_backtrace_size); i++)
+ for (i =3D 0; i < n_ips; i++)
new_ec->ips[i] =3D ips[i];
=20
- new_ec->next =3D ec_list[hash];
+ new_ec->n_ips =3D n_ips;
+ new_ec->next =3D ec_list[hash];
ec_list[hash] =3D new_ec;
=20
VGP_POPCC(VgpExeContext);
Modified: branches/COMPVBITS/coregrind/m_stacktrace.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_stacktrace.c 2005-12-25 03:33:12 UTC (=
rev 5434)
+++ branches/COMPVBITS/coregrind/m_stacktrace.c 2005-12-25 06:25:27 UTC (=
rev 5435)
@@ -68,12 +68,9 @@
vg_assert(sizeof(Addr) =3D=3D sizeof(void*));
=20
/* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
- putting zeroes in when the trail goes cold, which we guess to be
+ stopping when the trail goes cold, which we guess to be
when FP is not a reasonable stack location. */
=20
- for (i =3D 0; i < n_ips; i++)
- ips[i] =3D 0;
-
// JRS 2002-sep-17: hack, to round up fp_max to the end of the
// current page, at least. Dunno if it helps.
// NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
Modified: branches/COMPVBITS/include/pub_tool_stacktrace.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/include/pub_tool_stacktrace.h 2005-12-25 03:33:12 =
UTC (rev 5434)
+++ branches/COMPVBITS/include/pub_tool_stacktrace.h 2005-12-25 06:25:27 =
UTC (rev 5435)
@@ -36,7 +36,8 @@
=20
// Walks the stack to get instruction pointers from the top stack frames=
for
// thread 'tid'. Maximum of 'n_ips' addresses put into 'ips'; 0 is the=
top
-// of the stack, 1 is its caller, etc.
+// of the stack, 1 is its caller, etc. Everything from ips[n_ips] onwar=
ds
+// is undefined and should not be read.
extern UInt VG_(get_StackTrace) ( ThreadId tid, StackTrace ips, UInt n_i=
ps );
=20
// Apply a function to every element in the StackTrace. The parameter '=
n'
|
|
From: <js...@ac...> - 2005-12-25 03:55:04
|
Nightly build on phoenix ( SuSE 10.0 ) started at 2005-12-25 03:30:02 GMT Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 208 tests, 5 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) none/tests/mremap2 (stdout) none/tests/x86/faultstatus (stderr) none/tests/x86/int (stderr) ================================================= == Results from 24 hours ago == ================================================= Checking out vex source tree ... done Building vex ... done Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 208 tests, 6 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/mempool (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) none/tests/mremap2 (stdout) none/tests/x86/faultstatus (stderr) none/tests/x86/int (stderr) ================================================= == Difference between 24 hours ago and now == ================================================= *** old.short Sun Dec 25 03:43:01 2005 --- new.short Sun Dec 25 03:55:20 2005 *************** *** 10,14 **** ! == 208 tests, 6 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) - memcheck/tests/mempool (stderr) memcheck/tests/stack_switch (stderr) --- 10,13 ---- ! == 208 tests, 5 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/stack_switch (stderr) |
|
From: Tom H. <to...@co...> - 2005-12-25 03:43:12
|
Nightly build on dunsmere ( athlon, Fedora Core 4 ) started at 2005-12-25 03:30:05 GMT Results unchanged from 24 hours ago Checking out valgrind source tree ... done Configuring valgrind ... done Building valgrind ... done Running regression tests ... failed Regression test results follow == 210 tests, 7 stderr failures, 1 stdout failure ================= memcheck/tests/leak-tree (stderr) memcheck/tests/mempool (stderr) memcheck/tests/pointer-trace (stderr) memcheck/tests/stack_switch (stderr) memcheck/tests/x86/scalar (stderr) none/tests/mremap2 (stdout) none/tests/x86/faultstatus (stderr) none/tests/x86/int (stderr) |
|
From: <sv...@va...> - 2005-12-25 03:33:22
|
Author: njn Date: 2005-12-25 03:33:12 +0000 (Sun, 25 Dec 2005) New Revision: 5434 Log: update Modified: trunk/docs/internals/performance.txt Modified: trunk/docs/internals/performance.txt =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- trunk/docs/internals/performance.txt 2005-12-25 02:53:02 UTC (rev 543= 3) +++ trunk/docs/internals/performance.txt 2005-12-25 03:33:12 UTC (rev 543= 4) @@ -23,6 +23,9 @@ - Julian changed findSb to slowly move superblocks to the front of the l= ist as they were accessed. This sped up perf/heap by 25--50%, and some bi= g programs (eg. ktuberling) programs by a couple of percent. +- Nick reduced the iteration count of the loop in swizzle() from 20 to 5= , + which gave almost identical results while saving 2% in perf/tinycc and= 10% + in perf/heap on a 3GHz Prescott P4. =20 COMPVBITS branch: - Nick converted to compress V bits, initial version saved 0--5% on most |
|
From: <sv...@va...> - 2005-12-25 02:53:08
|
Author: njn
Date: 2005-12-25 02:53:02 +0000 (Sun, 25 Dec 2005)
New Revision: 5433
Log:
This was meant to be merged in the from the trunk as part of a previous
merging.
Modified:
branches/COMPVBITS/coregrind/m_main.c
Modified: branches/COMPVBITS/coregrind/m_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_main.c 2005-12-25 02:49:45 UTC (rev 54=
32)
+++ branches/COMPVBITS/coregrind/m_main.c 2005-12-25 02:53:02 UTC (rev 54=
33)
@@ -66,6 +66,7 @@
=20
static void print_all_stats ( void )
{
+ VG_(print_translation_stats)();
VG_(print_tt_tc_stats)();
VG_(print_scheduler_stats)();
VG_(print_ExeContext_stats)();
|
|
From: <sv...@va...> - 2005-12-25 02:49:47
|
Author: njn
Date: 2005-12-25 02:49:45 +0000 (Sun, 25 Dec 2005)
New Revision: 5432
Log:
Merge r5431 (swizzle() speedup) from trunk.
Modified:
branches/COMPVBITS/coregrind/m_mallocfree.c
Modified: branches/COMPVBITS/coregrind/m_mallocfree.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_mallocfree.c 2005-12-25 02:47:12 UTC (=
rev 5431)
+++ branches/COMPVBITS/coregrind/m_mallocfree.c 2005-12-25 02:49:45 UTC (=
rev 5432)
@@ -696,7 +696,12 @@
if (p_best =3D=3D NULL) return;
=20
pn =3D pp =3D p_best;
- for (i =3D 0; i < 20; i++) {
+
+ // This loop bound was 20 for a long time, but experiments showed tha=
t
+ // reducing it to 10 gave the same result in all the tests, and 5 got=
the
+ // same result in 85--100% of cases. And it's called often enough to=
be
+ // noticeable in programs that allocated a lot.
+ for (i =3D 0; i < 5; i++) {
pn =3D get_next_b(pn);
pp =3D get_prev_b(pp);
if (pn < p_best) p_best =3D pn;
|
|
From: <sv...@va...> - 2005-12-25 02:47:19
|
Author: njn
Date: 2005-12-25 02:47:12 +0000 (Sun, 25 Dec 2005)
New Revision: 5431
Log:
A minor performance improvement -- make swizzle() faster.
Modified:
trunk/coregrind/m_mallocfree.c
Modified: trunk/coregrind/m_mallocfree.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/coregrind/m_mallocfree.c 2005-12-24 16:34:49 UTC (rev 5430)
+++ trunk/coregrind/m_mallocfree.c 2005-12-25 02:47:12 UTC (rev 5431)
@@ -704,7 +704,12 @@
if (p_best =3D=3D NULL) return;
=20
pn =3D pp =3D p_best;
- for (i =3D 0; i < 20; i++) {
+
+ // This loop bound was 20 for a long time, but experiments showed tha=
t
+ // reducing it to 10 gave the same result in all the tests, and 5 got=
the
+ // same result in 85--100% of cases. And it's called often enough to=
be
+ // noticeable in programs that allocated a lot.
+ for (i =3D 0; i < 5; i++) {
pn =3D get_next_b(pn);
pp =3D get_prev_b(pp);
if (pn < p_best) p_best =3D pn;
|
|
From: Nicholas N. <nj...@cs...> - 2005-12-24 22:29:02
|
Hi,
I noticed that this check in coregrind/m_stacks.c:VG_(unknown_SP_update)()
is being triggered a lot in programs that don't do any stack switching:
/* Check if the stack pointer is still in the same stack as before. */
if (current_stack == NULL ||
new_SP < current_stack->start || new_SP > current_stack->end) {
VG_(printf)("new_SP = %p, curr->start = %p, curr->end = %p\n",
new_SP, current_stack->start, current_stack->end);
Stack* new_stack = find_stack_by_addr(new_SP);
if (new_stack && new_stack->id != current_stack->id) {
/* The stack pointer is now in another stack. Update the current
stack information and return without doing anything else. */
current_stack = new_stack;
return;
}
}
The problem is that in m_main.c the main stack is registered at a minimal
size, on my machine it's 0xBEFFF000--0xBEFFFFFF. And then it extends
beyond that, so the above "out of range" case matches for values like
0xBEFFEFE4, 0xBEFFEF64, 0xBEFFEF1C, etc. But the calls to
find_stack_by_addr() fail -- because there are no other stacks -- and so
it doesn't get changed. And then the cycle repeats.
This only occurs on the non-common SP changes -- for the common ones (eg.
increment/decrement by 4) stack membership is not tested for.
Basically the problem is that registered stacks cannot be extended. Or
perhaps that the main stack is not setup with a big enough range.
Perhaps we should make it 8MB to begin with (or whatever ulimit says it
can be)?
Thoughts?
Nick
|
|
From: John R.
|
Nicholas Nethercote wrote: > In the recent Valgrind survey five people complained about the > difficulty of tracking down the root cause of undefined value errors, > caused by the fact that Memcheck waits until an undefined value can > affect the visible behaviour of the program (eg. is used in a > conditional branch, or a syscall input). [snip] > It has been suggested that an option be present to do this eager > checking, but I'm not convinced it would be useful given the > overwhelming number of false positives. I'm wondering what other people > think. Thank you, Nicholas, for continuing to explore eager undef checking. One of the quality control policies that I deal with demands that an application must never fetch uninitialized bits from memory. This policy increases run-to-run repeatability when "unrelated" logic errors occur. Such repeatability makes maintenance easier, and increases reliability over the software lifecycle. The policy also increases compliance with ISO C 1989, which says that any use of uninitialized bits makes execution totally indeterminate. Language lawyers argue whether "mere fetch" constitutes "use," but an addition operation whose inputs contain uninit bits certainly is a use, even though non-eager memcheck will not complain until the sum affects I/O or flow of control. The policy makes developers aware of alignment holes and padding in structures. Often the response is "memset(&Struct, 0, sizeof(Struct));" shortly after declaration. This can increase runtime efficiency, particularly when the compiler "understands" memset(,0,) and thus elides subsequent "Struct.member<k> = 0;", or when the hardware has special instructions to clear entire cache lines. Of course, it can hurt on processors such as i586 Intel PentiumPlain/MMX, where a write miss does not allocate a cache line. But then memset can insert a fetch, for which i586 does allocate a new cache line upon miss. And memcheck can learn this specific exception, just like memcheck can learn about the intentional fetch overruns in strlen, strcpy, memcpy, etc. If the fundamental low-level language runtime libraries fetch uninit bits in their internal operation, then eager memcheck will notice, and the resulting "noise" will be bothersome. As a contribution towards eliminating this problem, from time to time I have "cleaned up" glibc so that its own internal testcases fetch no uninit bits. See my web page http://BitWagon.com/glibc-audit/glibc-audit.html Just as the first encounter between an application and memcheck often causes dismay, so the first encounter with eager memcheck is likely to be even more daunting. But "thousands" of complaints can be handled with just a few memset(), and this is heartening. Even junior team members can be productive at this stage. Finding alignment holes or padding in an applications's "basic" structs can be a wakeup call for storage efficiency. And if you keep track, within a week or two you might notice that the number and frequency of non-reproducible behaviors is decreasing. Bugs "cascade" less often; the original bug, the first misuse, tends to become visible immediately rather than after infecting several other areas of control or data. -- |
|
From: Nicholas N. <nj...@cs...> - 2005-12-24 18:15:43
|
Hi, In the recent Valgrind survey five people complained about the difficulty of tracking down the root cause of undefined value errors, caused by the fact that Memcheck waits until an undefined value can affect the visible behaviour of the program (eg. is used in a conditional branch, or a syscall input). A couple of people suggested doing more eager checking, and this idea has come up before. The problem is that the copying of undefined values is common, mostly due to the practice of padding structs for alignment and bitfields. I did some experimentation with eager checking a couple of years ago and found that it caused large numbers of false positives. I repeated the experiment again yesterday and saw the same results. I changed Memcheck to complain about the loading of any undefined values and tried various programs. For the empty C program that just returns zero, I get 24 errors from 23 contexts, most just from the dynamic linker. I get the following counts for the following programs: empty 1 errors from 1 context perf/bz2 8405487 errors from 30 contexts perf/tinycc 4647525 errors from 301 contexts I had to use --error-limit=no for these otherwise Memcheck would have stopped reporting errors after 100,000. These programs have no (unsuppressed) errors when run with a normal Memcheck. If I suppress the ones in the dynamic linker, I get: empty 1 errors from 1 context perf/bz2 8405464 errors from 8 contexts perf/tinycc 4647501 errors from 299 contexts If I change things so that any undefined value loaded gets loaded as if it was defined (to avoid possible cascading errors), I get: empty 1 errors from 1 context perf/bz2 4202624 errors from 2 contexts perf/tinycc 1137041 errors from 113 contexts I've attached the output from that last tinycc run. Some extra programs: vim 521 errors from 120 contexts gcc 384 errors from 53 contexts emacs 4876 errors from 63 contexts It has been suggested that an option be present to do this eager checking, but I'm not convinced it would be useful given the overwhelming number of false positives. I'm wondering what other people think. If you want to try this out for yourself, I've attached the patch I used. It's against the COMPVBITS branch, do this to check it out and build: svn co svn://www.valgrind.org/valgrind/branches/COMPVBITS cd COMPVBITS sh ./autogen.sh ./configure --prefix=<...> patch -p0 < eager.diff make Nick |
|
From: <sv...@va...> - 2005-12-24 16:34:58
|
Author: sewardj
Date: 2005-12-24 16:34:49 +0000 (Sat, 24 Dec 2005)
New Revision: 5430
Log:
Fix ppc32 build.
Modified:
branches/COMPVBITS/coregrind/m_machine.c
branches/COMPVBITS/coregrind/m_transtab.c
Modified: branches/COMPVBITS/coregrind/m_machine.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_machine.c 2005-12-24 15:33:32 UTC (rev=
5429)
+++ branches/COMPVBITS/coregrind/m_machine.c 2005-12-24 16:34:49 UTC (rev=
5430)
@@ -411,11 +411,11 @@
/* Either the value must not have been set yet (zero) or we can
tolerate it being set to the same value multiple times, as the
stack scanning logic in m_main is a bit stupid. */
- vg_assert(vai.ppc32_cache_line_szB =3D=3D 0
- || vai.ppc32_cache_line_szB =3D=3D szB);
+ vg_assert(vai.ppc_cache_line_szB =3D=3D 0
+ || vai.ppc_cache_line_szB =3D=3D szB);
=20
vg_assert(szB =3D=3D 32 || szB =3D=3D 128);
- vai.ppc32_cache_line_szB =3D szB;
+ vai.ppc_cache_line_szB =3D szB;
}
#endif
=20
Modified: branches/COMPVBITS/coregrind/m_transtab.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/m_transtab.c 2005-12-24 15:33:32 UTC (re=
v 5429)
+++ branches/COMPVBITS/coregrind/m_transtab.c 2005-12-24 16:34:49 UTC (re=
v 5430)
@@ -742,7 +742,7 @@
VexArchInfo vai;
=20
VG_(machine_get_VexArchInfo)( NULL, &vai );
- cls =3D vai.ppc32_cache_line_szB;
+ cls =3D vai.ppc_cache_line_szB;
=20
/* Stay sane .. */
vg_assert(cls =3D=3D 32 || cls =3D=3D 128);
|