|
From: <sv...@va...> - 2005-12-02 23:08:58
|
Author: njn
Date: 2005-12-02 23:08:54 +0000 (Fri, 02 Dec 2005)
New Revision: 5273
Log:
Rewrote set_address_range_perms(). Improves performance by 0--3% on typi=
cal
programs, and 22% on one unusual program (tsim_arch, which allocates very=
many
8KB objects on the stack).
Also, fixed the sec-maps stats debug output which was showing the wrong
sizes.
Modified:
branches/COMPVBITS/coregrind/pub_core_aspacemgr.h
branches/COMPVBITS/include/pub_tool_aspacemgr.h
branches/COMPVBITS/memcheck/mac_leakcheck.c
branches/COMPVBITS/memcheck/mac_shared.h
branches/COMPVBITS/memcheck/mc_main.c
Modified: branches/COMPVBITS/coregrind/pub_core_aspacemgr.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/coregrind/pub_core_aspacemgr.h 2005-12-02 19:30:27=
UTC (rev 5272)
+++ branches/COMPVBITS/coregrind/pub_core_aspacemgr.h 2005-12-02 23:08:54=
UTC (rev 5273)
@@ -232,10 +232,6 @@
extern SysRes VG_(am_munmap_client)( /*OUT*/Bool* need_discard,
Addr start, SizeT length );
=20
-/* Unmap the given address range and update the segment array
- accordingly. This fails if the range isn't valid for valgrind. */
-extern SysRes VG_(am_munmap_valgrind)( Addr start, SizeT length );
-
/* Let (start,len) denote an area within a single Valgrind-owned
segment (anon or file). Change the ownership of [start, start+len)
to the client instead. Fails if (start,len) does not denote a
Modified: branches/COMPVBITS/include/pub_tool_aspacemgr.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/include/pub_tool_aspacemgr.h 2005-12-02 19:30:27 U=
TC (rev 5272)
+++ branches/COMPVBITS/include/pub_tool_aspacemgr.h 2005-12-02 23:08:54 U=
TC (rev 5273)
@@ -147,6 +147,10 @@
/* Really just a wrapper around VG_(am_mmap_anon_float_valgrind). */
extern void* VG_(am_shadow_alloc)(SizeT size);
=20
+/* Unmap the given address range and update the segment array
+ accordingly. This fails if the range isn't valid for valgrind. */
+extern SysRes VG_(am_munmap_valgrind)( Addr start, SizeT length );
+
#endif // __PUB_TOOL_ASPACEMGR_H
=20
/*--------------------------------------------------------------------*/
Modified: branches/COMPVBITS/memcheck/mac_leakcheck.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mac_leakcheck.c 2005-12-02 19:30:27 UTC (=
rev 5272)
+++ branches/COMPVBITS/memcheck/mac_leakcheck.c 2005-12-02 23:08:54 UTC (=
rev 5273)
@@ -446,7 +446,7 @@
=20
/* Skip invalid chunks */
if (!(*lc_is_within_valid_secondary)(ptr)) {
- ptr =3D VG_ROUNDUP(ptr+1, SECONDARY_SIZE);
+ ptr =3D VG_ROUNDUP(ptr+1, SM_SIZE);
continue;
}
=20
Modified: branches/COMPVBITS/memcheck/mac_shared.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mac_shared.h 2005-12-02 19:30:27 UTC (rev=
5272)
+++ branches/COMPVBITS/memcheck/mac_shared.h 2005-12-02 23:08:54 UTC (rev=
5273)
@@ -204,8 +204,8 @@
/* The number of entries in the primary map can be altered. However
we hardwire the assumption that each secondary map covers precisely
64k of address space. */
-#define SECONDARY_SIZE 65536 /* DO NOT CHANGE */
-#define SECONDARY_MASK (SECONDARY_SIZE-1) /* DO NOT CHANGE */
+#define SM_SIZE 65536 /* DO NOT CHANGE */
+#define SM_MASK (SM_SIZE-1) /* DO NOT CHANGE */
=20
//zz #define SECONDARY_SHIFT 16
//zz #define SECONDARY_SIZE (1 << SECONDARY_SHIFT)
Modified: branches/COMPVBITS/memcheck/mc_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/COMPVBITS/memcheck/mc_main.c 2005-12-02 19:30:27 UTC (rev 52=
72)
+++ branches/COMPVBITS/memcheck/mc_main.c 2005-12-02 23:08:54 UTC (rev 52=
73)
@@ -56,6 +56,7 @@
#include "mc_include.h"
#include "memcheck.h" /* for client requests */
=20
+// XXX: introduce PM_OFF macro
=20
#define EXPECTED_TAKEN(cond) __builtin_expect((cond),1)
#define EXPECTED_NOT_TAKEN(cond) __builtin_expect((cond),0)
@@ -129,6 +130,7 @@
/* --------------- Stats maps --------------- */
=20
static Int n_secmaps_issued =3D 0;
+static Int n_secmaps_deissued =3D 0;
static ULong n_auxmap_searches =3D 0;
static ULong n_auxmap_cmps =3D 0;
static Int n_sanity_cheap =3D 0;
@@ -182,15 +184,22 @@
#define MC_BITS32_WRITABLE 0x55 // 01_01_01_01b
#define MC_BITS32_READABLE 0xaa // 10_10_10_10b
=20
-#define MC_BITS64_NOACCESS 0x0000 // 00_00_00_00__00_00_00_00b
-#define MC_BITS64_WRITABLE 0x5555 // 01_01_01_01__01_01_01_01b
-#define MC_BITS64_READABLE 0xaaaa // 10_10_10_10__10_10_10_10b
+#define MC_BITS64_NOACCESS 0x0000 // 00_00_00_00b x 2
+#define MC_BITS64_WRITABLE 0x5555 // 01_01_01_01b x 2
+#define MC_BITS64_READABLE 0xaaaa // 10_10_10_10b x 2
=20
=20
#define SM_CHUNKS 16384
#define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
#define SM_OFF_64(aaa) (((aaa) & 0xffff) >> 3)
=20
+static inline Addr start_of_this_sm ( Addr a ) {
+ return (a & (~SM_MASK));
+}
+static inline Bool is_start_of_sm ( Addr a ) {
+ return (start_of_this_sm(a) =3D=3D a);
+}
+
typedef=20
struct {
UChar vabits32[SM_CHUNKS];
@@ -685,11 +694,12 @@
}
}
=20
-
-static void set_address_range_perms ( Addr a, SizeT len, UWord vabits64,
+static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits64=
,
UWord dsm_num )
{
- UWord sm_off64;
+ UWord vabits8, sm_off, sm_off64;
+ SizeT lenA, lenB, len_to_next_secmap;
+ Addr aNext;
SecMap* sm;
SecMap** binder;
SecMap* example_dsm;
@@ -701,27 +711,26 @@
vabits64 =3D=3D MC_BITS64_WRITABLE ||
vabits64 =3D=3D MC_BITS64_READABLE);
=20
- if (len =3D=3D 0)
+ if (lenT =3D=3D 0)
return;
=20
- if (len > 100 * 1000 * 1000) {
+ if (lenT > 100 * 1000 * 1000) {
if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
- Char* s =3D NULL; // placate GCC
+ Char* s =3D "unknown???";
if (vabits64 =3D=3D MC_BITS64_NOACCESS) s =3D "noaccess";
if (vabits64 =3D=3D MC_BITS64_WRITABLE) s =3D "writable";
if (vabits64 =3D=3D MC_BITS64_READABLE) s =3D "readable";
VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
- "large range %lu (%s)", len, s);
+ "large range %lu (%s)", lenT, s);
}
}
=20
# if VG_DEBUG_MEMORY >=3D 2
/*------------------ debug-only case ------------------ */
{
- // XXX: Simplest, slow version
UWord vabits8 =3D vabits64 & 0x3;
SizeT i;
- for (i =3D 0; i < len; i++) {
+ for (i =3D 0; i < lenT; i++) {
set_vabits8(aA + i, vabits8);
}
return;
@@ -734,77 +743,176 @@
to use (part of the space-compression scheme). */
example_dsm =3D &sm_distinguished[dsm_num];
=20
- /* Slowly do parts preceding 8-byte alignment. */
- while (len !=3D 0 && !VG_IS_8_ALIGNED(a)) {
- PROF_EVENT(151, "set_address_range_perms-loop1-pre");
- set_vabits8( a, vabits64 & 0x3 );
- a++;
- len--;
- } =20
+ vabits8 =3D vabits64 & 0x3;
+ =20
+ // We have to handle ranges covering various combinations of partial =
and
+ // whole sec-maps. Here is how parts 1, 2 and 3 are used in each cas=
e.
+ // Cases marked with a '*' are common.
+ //
+ // TYPE PARTS USED
+ // ---- ----------
+ // * one partial sec-map (p) 1
+ // - one whole sec-map (P) 2
+ //
+ // * two partial sec-maps (pp) 1,3=20
+ // - one partial, one whole sec-map (pP) 1,2
+ // - one whole, one partial sec-map (Pp) 2,3
+ // - two whole sec-maps (PP) 2,2
+ //
+ // * one partial, one whole, one partial (pPp) 1,2,3
+ // - one partial, two whole (pPP) 1,2,2
+ // - two whole, one partial (PPp) 2,2,3
+ // - three whole (PPP) 2,2,2
+ //
+ // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
+ // - one partial, N-1 whole (pP...PP) 1,2...2,2
+ // - N-1 whole, one partial (PP...Pp) 2,2...2,3
+ // - N whole (PP...PP) 2,2...2,3
=20
- if (len =3D=3D 0)
- return;
+ // Break up total length (lenT) into two parts: length in the first
+ // sec-map (lenA), and the rest (lenB); lenT =3D=3D lenA + lenB.
+ aNext =3D start_of_this_sm(a) + SM_SIZE;
+ len_to_next_secmap =3D aNext - a;
+ if ( lenT <=3D len_to_next_secmap ) {
+ // Range entirely within one sec-map. Covers almost all cases.
+ PROF_EVENT(151, "set_address_range_perms-single-secmap");
+ lenA =3D lenT;
+ lenB =3D 0;
+ } else if (is_start_of_sm(a)) {
+ // Range spans at least one whole sec-map, and starts at the begin=
ning
+ // of a sec-map; skip to Part 2.
+ PROF_EVENT(152, "set_address_range_perms-startof-secmap");
+ lenA =3D 0;
+ lenB =3D lenT;
+ goto part2;
+ } else {
+ // Range spans two or more sec-maps, first one is partial.
+ PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
+ lenA =3D len_to_next_secmap;
+ lenB =3D lenT - lenA;
+ }
=20
- tl_assert(VG_IS_8_ALIGNED(a) && len > 0);
+ //-------------------------------------------------------------------=
-----
+ // Part 1: Deal with the first sec_map. Most of the time the range w=
ill be
+ // entirely within a sec_map and this part alone will suffice. Also,
+ // doing it this way lets us avoid repeatedly testing for the crossin=
g of
+ // a sec-map boundary within these loops.
+ //-------------------------------------------------------------------=
-----
=20
- /* Now go in steps of 8 bytes. */
+ // If it's distinguished, make it undistinguished if necessary.
binder =3D find_secmap_binder_for_addr(a);
+ if (is_distinguished_sm(*binder)) {
+ if (*binder =3D=3D example_dsm) {
+ // Sec-map already has the V+A bits that we want, so skip.
+ PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
+ a =3D aNext;
+ lenA =3D 0;
+ } else {
+ PROF_EVENT(155, "set_address_range_perms-dist-sm1");
+ *binder =3D copy_for_writing(*binder);
+ }
+ }
+ sm =3D *binder;
=20
- while (len >=3D 8) {
- PROF_EVENT(152, "set_address_range_perms-loop8");
+ // 1 byte steps
+ while (True) {
+ if (VG_IS_8_ALIGNED(a)) break;
+ if (lenA < 1) break;
+ PROF_EVENT(156, "set_address_range_perms-loop1a");
+ sm_off =3D SM_OFF(a);
+ insert_vabit8_into_vabits32( a, vabits8, &(sm->vabits32[sm_off]) )=
;
+ a +=3D 1;
+ lenA -=3D 1;
+ }
+ // 8-aligned, 8 byte steps
+ while (True) {
+ if (lenA < 8) break;
+ PROF_EVENT(157, "set_address_range_perms-loop8a");
+ sm_off64 =3D SM_OFF_64(a);
+ ((UShort*)(sm->vabits32))[sm_off64] =3D vabits64;
+ a +=3D 8;
+ lenA -=3D 8;
+ }
+ // 1 byte steps
+ while (True) {
+ if (lenA < 1) break;
+ PROF_EVENT(158, "set_address_range_perms-loop1b");
+ sm_off =3D SM_OFF(a);
+ insert_vabit8_into_vabits32( a, vabits8, &(sm->vabits32[sm_off]) )=
;
+ a +=3D 1;
+ lenA -=3D 1;
+ }
=20
- if ((a & SECONDARY_MASK) =3D=3D 0) {
- /* we just traversed a primary map boundary, so update the bind=
er. */
- binder =3D find_secmap_binder_for_addr(a);
- PROF_EVENT(153, "set_address_range_perms-update-binder");
+ // We've finished the first sec-map. Is that it?
+ if (lenB =3D=3D 0)
+ return;
=20
- /* Space-optimisation. If we are setting the entire
- secondary map, just point this entry at one of our
- distinguished secondaries. However, only do that if it
- already points at a distinguished secondary, since doing
- otherwise would leak the existing secondary. We could do
- better and free up any pre-existing non-distinguished
- secondary at this point, since we are guaranteed that each
- non-dist secondary only has one pointer to it, and we have
- that pointer right here. */
- if (len >=3D SECONDARY_SIZE && is_distinguished_sm(*binder)) {
- PROF_EVENT(154, "set_address_range_perms-entire-secmap");
- *binder =3D example_dsm;
- len -=3D SECONDARY_SIZE;
- a +=3D SECONDARY_SIZE;
- continue;
- }
+ //-------------------------------------------------------------------=
-----
+ // Part 2: Fast-set entire sec-maps at a time.
+ //-------------------------------------------------------------------=
-----
+ part2:
+ // 64KB-aligned, 64KB steps.
+ // Nb: we can reach here with lenB < SM_SIZE
+ while (True) {
+ if (lenB < SM_SIZE) break;
+ tl_assert(is_start_of_sm(a));
+ PROF_EVENT(159, "set_address_range_perms-loop64K");
+ binder =3D find_secmap_binder_for_addr(a);
+ if (!is_distinguished_sm(*binder)) {
+ PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm")=
;
+ // Free the non-distinguished sec-map that we're replacing. Th=
is
+ // case happens moderately often, enough to be worthwhile.
+ VG_(am_munmap_valgrind)((Addr)*binder, sizeof(SecMap));
+ n_secmaps_deissued++; // Needed for the expensive sanity c=
heck
}
-
- /* If the primary is already pointing to a distinguished map
- with the same properties as we're trying to set, then leave
- it that way. Otherwise we have to do some writing. */
- if (*binder !=3D example_dsm) {
- /* Make sure it's OK to write the secondary. */
- if (is_distinguished_sm(*binder)) {
- *binder =3D copy_for_writing(*binder);
- }
- sm =3D *binder;
- sm_off64 =3D SM_OFF_64(a);
- ((UShort*)(sm->vabits32))[sm_off64] =3D vabits64;
- }
-
- a +=3D 8;
- len -=3D 8;
+ // Make the sec-map entry point to the example DSM
+ *binder =3D example_dsm;
+ lenB -=3D SM_SIZE;
+ a +=3D SM_SIZE;
}
=20
- if (len =3D=3D 0)
+ // We've finished the whole sec-maps. Is that it?
+ if (lenB =3D=3D 0)
return;
=20
- tl_assert(VG_IS_8_ALIGNED(a) && len > 0 && len < 8);
+ //-------------------------------------------------------------------=
-----
+ // Part 3: Finish off the final partial sec-map, if necessary.
+ //-------------------------------------------------------------------=
-----
=20
- /* Finish the upper fragment. */
- while (len > 0) {
- PROF_EVENT(155, "set_address_range_perms-loop1-post");
- set_vabits8 ( a, vabits64 & 0x3 );
- a++;
- len--;
- } =20
+ tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
+
+ // If it's distinguished, make it undistinguished if necessary.
+ binder =3D find_secmap_binder_for_addr(a);
+ if (is_distinguished_sm(*binder)) {
+ if (*binder =3D=3D example_dsm) {
+ // Sec-map already has the V+A bits that we want, so stop.
+ PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
+ return;
+ } else {
+ PROF_EVENT(162, "set_address_range_perms-dist-sm2");
+ *binder =3D copy_for_writing(*binder);
+ }
+ }
+ sm =3D *binder;
+
+ // 8-aligned, 8 byte steps
+ while (True) {
+ if (lenB < 8) break;
+ PROF_EVENT(163, "set_address_range_perms-loop8b");
+ sm_off64 =3D SM_OFF_64(a);
+ ((UShort*)(sm->vabits32))[sm_off64] =3D vabits64;
+ a +=3D 8;
+ lenB -=3D 8;
+ }
+ // 1 byte steps
+ while (True) {
+ if (lenB < 1) return;
+ PROF_EVENT(164, "set_address_range_perms-loop1c");
+ sm_off =3D SM_OFF(a);
+ insert_vabit8_into_vabits32( a, vabits8, &(sm->vabits32[sm_off]) )=
;
+ a +=3D 1;
+ lenB -=3D 1;
+ }
}
=20
=20
@@ -2410,7 +2518,7 @@
}
}
=20
- if (n_secmaps_found !=3D n_secmaps_issued)
+ if (n_secmaps_found !=3D (n_secmaps_issued - n_secmaps_deissued))
bad =3D True;
=20
if (bad) {
@@ -2736,13 +2844,11 @@
" memcheck: auxmaps: %lld searches, %lld comparisons",
n_auxmap_searches, n_auxmap_cmps ); =20
VG_(message)(Vg_DebugMsg,
- " memcheck: secondaries: %d issued (%dk, %dM)",
+ " memcheck: secondaries: %d issued (%dk, %dM), %d deissued",
n_secmaps_issued,=20
- n_secmaps_issued * 64,
- n_secmaps_issued / 16 ); =20
- VG_(message)(Vg_DebugMsg,
- " memcheck: sec V bit entries: %d",
- VG_(OSet_Size)(secVBitTable) );
+ n_secmaps_issued * sizeof(SecMap) / 1024,
+ n_secmaps_issued * sizeof(SecMap) / (1024 * 1024),
+ n_secmaps_deissued); =20
=20
n_accessible_dist =3D 0;
for (i =3D 0; i < N_PRIMARY_MAP; i++) {
@@ -2761,9 +2867,12 @@
VG_(message)(Vg_DebugMsg,
" memcheck: secondaries: %d accessible and distinguished (%dk, =
%dM)",
n_accessible_dist,=20
- n_accessible_dist * 64,
- n_accessible_dist / 16 ); =20
+ n_accessible_dist * sizeof(SecMap) / 1024,
+ n_accessible_dist * sizeof(SecMap) / (1024 * 1024) );
=20
+ VG_(message)(Vg_DebugMsg,
+ " memcheck: sec V bit entries: %d",
+ VG_(OSet_Size)(secVBitTable) );
}
=20
if (0) {
|