|
From: <sv...@va...> - 2005-04-21 17:34:08
|
Author: sewardj
Date: 2005-04-21 18:34:00 +0100 (Thu, 21 Apr 2005)
New Revision: 3538
Modified:
trunk/memcheck/mac_needs.c
trunk/memcheck/mac_shared.h
trunk/memcheck/mc_main.c
Log:
* Crank up the memcheck event-counting system, and enhance it to
name the events, rather than just number them, which makes it a
lot easier to use
* Based on that, fill in some fast-path cases=20
{LOAD,STORE}V{4,2,1}. The assembly code looks about the same
length as it did before, on x86. Fast-path cases for the
stack have yet to be done.
Modified: trunk/memcheck/mac_needs.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/memcheck/mac_needs.c 2005-04-21 02:37:54 UTC (rev 3537)
+++ trunk/memcheck/mac_needs.c 2005-04-21 17:34:00 UTC (rev 3538)
@@ -797,25 +797,35 @@
=20
#ifdef MAC_PROFILE_MEMORY
=20
-UInt MAC_(event_ctr)[N_PROF_EVENTS];
+UInt MAC_(event_ctr)[N_PROF_EVENTS];
+HChar* MAC_(event_ctr_name)[N_PROF_EVENTS];
=20
static void init_prof_mem ( void )
{
Int i;
- for (i =3D 0; i < N_PROF_EVENTS; i++)
+ for (i =3D 0; i < N_PROF_EVENTS; i++) {
MAC_(event_ctr)[i] =3D 0;
+ MAC_(event_ctr_name)[i] =3D NULL;
+ }
}
=20
static void done_prof_mem ( void )
{
- Int i;
+ Int i;
+ Bool spaced =3D False;
for (i =3D 0; i < N_PROF_EVENTS; i++) {
- if ((i % 10) =3D=3D 0)=20
+ if (!spaced && (i % 10) =3D=3D 0) {
VG_(printf)("\n");
- if (MAC_(event_ctr)[i] > 0)
- VG_(printf)( "prof mem event %2d: %d\n", i, MAC_(event_ctr)[i] =
);
+ spaced =3D True;
+ }
+ if (MAC_(event_ctr)[i] > 0) {
+ spaced =3D False;
+ VG_(printf)( "prof mem event %3d: %9d %s\n",=20
+ i, MAC_(event_ctr)[i],
+ MAC_(event_ctr_name)[i]=20
+ ? MAC_(event_ctr_name)[i] : "unnamed");
+ }
}
- VG_(printf)("\n");
}
=20
#else
Modified: trunk/memcheck/mac_shared.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/memcheck/mac_shared.h 2005-04-21 02:37:54 UTC (rev 3537)
+++ trunk/memcheck/mac_shared.h 2005-04-21 17:34:00 UTC (rev 3538)
@@ -171,21 +171,27 @@
VgpToolCC;
=20
/* Define to collect detailed performance info. */
-/* #define MAC_PROFILE_MEMORY */
+#define MAC_PROFILE_MEMORY
=20
#ifdef MAC_PROFILE_MEMORY
-# define N_PROF_EVENTS 150
+# define N_PROF_EVENTS 500
=20
-extern UInt MAC_(event_ctr)[N_PROF_EVENTS];
+extern UInt MAC_(event_ctr)[N_PROF_EVENTS];
+extern HChar* MAC_(event_ctr_name)[N_PROF_EVENTS];
=20
-# define PROF_EVENT(ev) \
- do { tl_assert((ev) >=3D 0 && (ev) < N_PROF_EVENTS); \
- MAC_(event_ctr)[ev]++; \
+# define PROF_EVENT(ev, name) \
+ do { tl_assert((ev) >=3D 0 && (ev) < N_PROF_EVENTS); \
+ /* crude and inaccurate check to ensure the same */ \
+ /* event isn't being used with > 1 name */ \
+ if (MAC_(event_ctr_name)[ev]) \
+ tl_assert(name =3D=3D MAC_(event_ctr_name)[ev]); \
+ MAC_(event_ctr)[ev]++; \
+ MAC_(event_ctr_name)[ev] =3D (name); \
} while (False);
=20
#else
=20
-# define PROF_EVENT(ev) /* */
+# define PROF_EVENT(ev, name) /* */
=20
#endif /* MAC_PROFILE_MEMORY */
=20
@@ -437,7 +443,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_4)(Addr new_SP) \
{ \
- PROF_EVENT(110); \
+ PROF_EVENT(110, "new_mem_stack_4"); \
if (VG_IS_4_ALIGNED(new_SP)) { \
ALIGNED4_NEW ( new_SP ); \
} else { \
@@ -447,7 +453,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_4)(Addr new_SP) \
{ \
- PROF_EVENT(120); \
+ PROF_EVENT(120, "die_mem_stack_4"); \
if (VG_IS_4_ALIGNED(new_SP)) { \
ALIGNED4_DIE ( new_SP-4 ); \
} else { \
@@ -457,7 +463,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_8)(Addr new_SP) \
{ \
- PROF_EVENT(111); \
+ PROF_EVENT(111, "new_mem_stack_8"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
} else if (VG_IS_4_ALIGNED(new_SP)) { \
@@ -470,7 +476,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_8)(Addr new_SP) \
{ \
- PROF_EVENT(121); \
+ PROF_EVENT(121, "die_mem_stack_8"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-8 ); \
} else if (VG_IS_4_ALIGNED(new_SP)) { \
@@ -483,7 +489,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_12)(Addr new_SP) \
{ \
- PROF_EVENT(112); \
+ PROF_EVENT(112, "new_mem_stack_12"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED4_NEW ( new_SP+8 ); \
@@ -497,7 +503,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_12)(Addr new_SP) \
{ \
- PROF_EVENT(122); \
+ PROF_EVENT(122, "die_mem_stack_12"); \
/* Note the -12 in the test */ \
if (VG_IS_8_ALIGNED(new_SP-12)) { \
ALIGNED8_DIE ( new_SP-12 ); \
@@ -512,7 +518,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_16)(Addr new_SP) \
{ \
- PROF_EVENT(113); \
+ PROF_EVENT(113, "new_mem_stack_16"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED8_NEW ( new_SP+8 ); \
@@ -527,7 +533,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_16)(Addr new_SP) \
{ \
- PROF_EVENT(123); \
+ PROF_EVENT(123, "die_mem_stack_16"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-16 ); \
ALIGNED8_DIE ( new_SP-8 ); \
@@ -542,7 +548,7 @@
\
void VGA_REGPARM(1) MAC_(new_mem_stack_32)(Addr new_SP) \
{ \
- PROF_EVENT(114); \
+ PROF_EVENT(114, "new_mem_stack_32"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_NEW ( new_SP ); \
ALIGNED8_NEW ( new_SP+8 ); \
@@ -561,7 +567,7 @@
\
void VGA_REGPARM(1) MAC_(die_mem_stack_32)(Addr new_SP) \
{ \
- PROF_EVENT(124); \
+ PROF_EVENT(124, "die_mem_stack_32"); \
if (VG_IS_8_ALIGNED(new_SP)) { \
ALIGNED8_DIE ( new_SP-32 ); \
ALIGNED8_DIE ( new_SP-24 ); \
@@ -580,13 +586,13 @@
\
void MAC_(new_mem_stack) ( Addr a, SizeT len ) \
{ \
- PROF_EVENT(115); \
+ PROF_EVENT(115, "new_mem_stack"); \
UNALIGNED_NEW ( a, len ); \
} \
\
void MAC_(die_mem_stack) ( Addr a, SizeT len ) \
{ \
- PROF_EVENT(125); \
+ PROF_EVENT(125, "die_mem_stack"); \
UNALIGNED_DIE ( a, len ); \
}
=20
Modified: trunk/memcheck/mc_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/memcheck/mc_main.c 2005-04-21 02:37:54 UTC (rev 3537)
+++ trunk/memcheck/mc_main.c 2005-04-21 17:34:00 UTC (rev 3538)
@@ -52,6 +52,17 @@
//#include "vg_profile.c"
=20
=20
+#define EXPECTED_TAKEN(cond) __builtin_expect((cond),1)
+#define EXPECTED_NOT_TAKEN(cond) __builtin_expect((cond),0)
+
+/* Define to debug the mem audit system. Set to:
+ 0 no debugging, fast cases are used
+ 1 some sanity checking, fast cases are used
+ 2 max sanity checking, only slow cases are used
+*/
+#define VG_DEBUG_MEMORY 1
+
+
typedef enum {
MC_Ok =3D 5, MC_AddrErr =3D 6, MC_ValueErr =3D 7
} MC_ReadResult;
@@ -69,12 +80,16 @@
we hardwire the assumption that each secondary map covers precisely
64k of address space. */
=20
+/* Only change this. N_PRIMARY_MAPS *must* be a power of 2. */
#define N_PRIMARY_BITS 16
-#define N_PRIMARY_MAPS ((1 << N_PRIMARY_BITS)-1)
=20
-#define MAX_PRIMARY_ADDRESS (Addr)(((Addr)65536) * N_PRIMARY_MAPS)
+/* Do not change this. */
+#define N_PRIMARY_MAPS (1 << N_PRIMARY_BITS)
=20
+/* Do not change this. */
+#define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAPS)-1)
=20
+
/* --------------- Secondary maps --------------- */
=20
typedef=20
@@ -304,10 +319,11 @@
Bool aok;
UWord abit, vbyte;
=20
- PROF_EVENT(70);
+ PROF_EVENT(30, "mc_LOADVn_slow");
tl_assert(szB =3D=3D 8 || szB =3D=3D 4 || szB =3D=3D 2 || szB =3D=3D =
1);
=20
while (True) {
+ PROF_EVENT(31, "mc_LOADVn_slow(loop)");
ai =3D a+byte_offset_w(szB,bigendian,i);
get_abit_and_vbyte(&abit, &vbyte, ai);
aok =3D abit =3D=3D VGM_BIT_VALID;
@@ -337,13 +353,14 @@
Bool aok;
Addr ai;
=20
- PROF_EVENT(71);
+ PROF_EVENT(35, "mc_STOREVn_slow");
tl_assert(szB =3D=3D 8 || szB =3D=3D 4 || szB =3D=3D 2 || szB =3D=3D =
1);
=20
/* Dump vbytes in memory, iterating from least to most significant
byte. At the same time establish addressibility of the
location. */
for (i =3D 0; i < szB; i++) {
+ PROF_EVENT(36, "mc_STOREVn_slow(loop)");
ai =3D a+byte_offset_w(szB,bigendian,i);
abit =3D get_abit(ai);
aok =3D abit =3D=3D VGM_BIT_VALID;
@@ -376,8 +393,6 @@
=20
//zz #if 0 /* this is the old implementation */
//zz=20
-//zz /* Define to debug the mem audit system. */
-//zz /* #define VG_DEBUG_MEMORY */
//zz=20
//zz=20
//zz /*------------------------------------------------------------*/
@@ -523,8 +538,6 @@
=20
/* auxmap_size =3D auxmap_used =3D 0;=20
no ... these are statically initialised */
-
- tl_assert( TL_(expensive_sanity_check)() );
}
=20
=20
@@ -828,21 +841,21 @@
=20
static void mc_make_noaccess ( Addr a, SizeT len )
{
- PROF_EVENT(35);
+ PROF_EVENT(40, "mc_make_noaccess");
DEBUG("mc_make_noaccess(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID );
}
=20
static void mc_make_writable ( Addr a, SizeT len )
{
- PROF_EVENT(36);
+ PROF_EVENT(41, "mc_make_writable");
DEBUG("mc_make_writable(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID );
}
=20
static void mc_make_readable ( Addr a, SizeT len )
{
- PROF_EVENT(37);
+ PROF_EVENT(42, "mc_make_readable");
DEBUG("mc_make_readable(%p, %llu)\n", a, (ULong)len);
set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID );
}
@@ -850,6 +863,7 @@
static __inline__
void make_aligned_word32_writable(Addr a)
{
+ PROF_EVENT(43, "make_aligned_word32_writable");
mc_make_writable(a, 4);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -870,6 +884,7 @@
static __inline__
void make_aligned_word32_noaccess(Addr a)
{
+ PROF_EVENT(44, "make_aligned_word32_noaccess");
mc_make_noaccess(a, 4);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -891,6 +906,7 @@
static __inline__
void make_aligned_word64_writable(Addr a)
{
+ PROF_EVENT(45, "make_aligned_word64_writable");
mc_make_writable(a, 8);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -908,6 +924,7 @@
static __inline__
void make_aligned_word64_noaccess(Addr a)
{
+ PROF_EVENT(46, "make_aligned_word64_noaccess");
mc_make_noaccess(a, 8);
//zz SecMap* sm;
//zz UInt sm_off;
@@ -939,9 +956,9 @@
=20
DEBUG("mc_copy_address_range_state\n");
=20
- PROF_EVENT(40);
+ PROF_EVENT(50, "mc_copy_address_range_state");
for (i =3D 0; i < len; i++) {
- PROF_EVENT(41);
+ PROF_EVENT(51, "mc_copy_address_range_state(loop)");
get_abit_and_vbyte( &abit, &vbyte, src+i );
set_abit_and_vbyte( dst+i, abit, vbyte );
}
@@ -964,9 +981,9 @@
{
SizeT i;
UWord abit;
- PROF_EVENT(42);
+ PROF_EVENT(60, "mc_check_noaccess");
for (i =3D 0; i < len; i++) {
- PROF_EVENT(43);
+ PROF_EVENT(61, "mc_check_noaccess(loop)");
abit =3D get_abit(a);
if (abit =3D=3D VGM_BIT_VALID) {
if (bad_addr !=3D NULL)=20
@@ -982,9 +999,9 @@
{
SizeT i;
UWord abit;
- PROF_EVENT(42);
+ PROF_EVENT(62, "mc_check_writable");
for (i =3D 0; i < len; i++) {
- PROF_EVENT(43);
+ PROF_EVENT(63, "mc_check_writable(loop)");
abit =3D get_abit(a);
if (abit =3D=3D VGM_BIT_INVALID) {
if (bad_addr !=3D NULL) *bad_addr =3D a;
@@ -1001,10 +1018,10 @@
UWord abit;
UWord vbyte;
=20
- PROF_EVENT(44);
+ PROF_EVENT(64, "mc_check_readable");
DEBUG("mc_check_readable\n");
for (i =3D 0; i < len; i++) {
- PROF_EVENT(45);
+ PROF_EVENT(65, "mc_check_readable(loop)");
get_abit_and_vbyte(&abit, &vbyte, a);
// Report addressability errors in preference to definedness error=
s
// by checking the A bits first.
@@ -1032,10 +1049,10 @@
{
UWord abit;
UWord vbyte;
- PROF_EVENT(46);
+ PROF_EVENT(66, "mc_check_readable_asciiz");
DEBUG("mc_check_readable_asciiz\n");
while (True) {
- PROF_EVENT(47);
+ PROF_EVENT(67, "mc_check_readable_asciiz(loop)");
get_abit_and_vbyte(&abit, &vbyte, a);
// As in mc_check_readable(), check A bits first
if (abit !=3D VGM_BIT_VALID) {
@@ -1276,6 +1293,7 @@
VGA_REGPARM(1)
ULong MC_(helperc_LOADV8) ( Addr a )
{
+ PROF_EVENT(70, "helperc_LOADV8");
return mc_LOADVn_slow( a, 8, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz return mc_rd_V8_SLOWLY(a);
@@ -1311,6 +1329,7 @@
VGA_REGPARM(1)
void MC_(helperc_STOREV8) ( Addr a, ULong vbytes )
{
+ PROF_EVENT(71, "helperc_STOREV8");
mc_STOREVn_slow( a, 8, vbytes, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz mc_wr_V8_SLOWLY(a, vbytes);
@@ -1349,86 +1368,152 @@
/* ------------------------ Size =3D 4 ------------------------ */
=20
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV4) ( Addr a )
+UWord MC_(helperc_LOADV4) ( Addr aA )
{
- return (UWord)mc_LOADVn_slow( a, 4, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V4_SLOWLY(a);
-//zz # else
-//zz UInt sec_no =3D rotateRight16(a) & 0x3FFFF;
-//zz SecMap* sm =3D primary_map[sec_no];
-//zz UInt a_off =3D (SM_OFF(a)) >> 3;
-//zz UChar abits =3D sm->abits[a_off];
-//zz abits >>=3D (a & 4);
-//zz abits &=3D 15;
-//zz PROF_EVENT(60);
-//zz if (abits =3D=3D VGM_NIBBLE_VALID) {
-//zz /* Handle common case quickly: a is suitably aligned, is mapp=
ed,
-//zz and is addressible. */
-//zz UInt v_off =3D SM_OFF(a);
-//zz return ((UInt*)(sm->vbyte))[ v_off >> 2 ];
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V4_SLOWLY(a);
-//zz }
-//zz # endif
+ PROF_EVENT(220, "helperc_LOADV4");
+
+# if VG_DEBUG_MEMORY >=3D 2
+ return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
+# else
+
+ const UWord mask =3D ~((0x10000-4) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a =3D (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(221, "helperc_LOADV4-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 4, False/*littleendian*/ );
+ }
+
+ UWord sec_no =3D (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >=3D 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm =3D primary_map[sec_no];
+ UWord v_off =3D a & 0xFFFF;
+ UWord a_off =3D v_off >> 3;
+ UWord abits =3D (UWord)(sm->abits[a_off]);
+ abits >>=3D (a & 4);
+ abits &=3D 15;
+ if (EXPECTED_TAKEN(abits =3D=3D VGM_NIBBLE_VALID)) {
+ /* Handle common case quickly: a is suitably aligned, is mapped,
+ and is addressible. */
+ return (UWord)(
+ 0xFFFFFFFFULL
+ & ((UInt*)(sm->vbyte))[ v_off >> 2 ]
+ );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(222, "helperc_LOADV4-slow2");
+ return (UWord)mc_LOADVn_slow( a, 4, False/*littleendian*/ );
+ }
+
+# endif
}
=20
+
VGA_REGPARM(2)
-void MC_(helperc_STOREV4) ( Addr a, UWord vbytes )
+void MC_(helperc_STOREV4) ( Addr aA, UWord vbytes )
{
+ PROF_EVENT(230, "helperc_STOREV4");
+
+# if VG_DEBUG_MEMORY >=3D 2
mc_STOREVn_slow( a, 4, (ULong)vbytes, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz mc_wr_V4_SLOWLY(a, vbytes);
-//zz # else
-//zz UInt sec_no =3D rotateRight16(a) & 0x3FFFF;
-//zz SecMap* sm =3D primary_map[sec_no];
-//zz UInt a_off =3D (SM_OFF(a)) >> 3;
-//zz UChar abits =3D sm->abits[a_off];
-//zz abits >>=3D (a & 4);
-//zz abits &=3D 15;
-//zz PROF_EVENT(61);
-//zz if (!IS_DISTINGUISHED_SM(sm) && abits =3D=3D VGM_NIBBLE_VALID) {
-//zz /* Handle common case quickly: a is suitably aligned, is mapp=
ed,
-//zz and is addressible. */
-//zz UInt v_off =3D SM_OFF(a);
-//zz ((UInt*)(sm->vbyte))[ v_off >> 2 ] =3D vbytes;
-//zz } else {
-//zz /* Slow but general case. */
-//zz mc_wr_V4_SLOWLY(a, vbytes);
-//zz }
-//zz # endif
+# else
+
+ const UWord mask =3D ~((0x10000-4) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a =3D (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(231, "helperc_STOREV4-slow1");
+ mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
+ return;
+ }
+
+ UWord sec_no =3D (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >=3D 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm =3D primary_map[sec_no];
+ UWord v_off =3D a & 0xFFFF;
+ UWord a_off =3D v_off >> 3;
+ UWord abits =3D (UWord)(sm->abits[a_off]);
+ abits >>=3D (a & 4);
+ abits &=3D 15;
+ if (EXPECTED_TAKEN(!is_distinguished_sm(sm)=20
+ && abits =3D=3D VGM_NIBBLE_VALID)) {
+ /* Handle common case quickly: a is suitably aligned, is mapped,
+ and is addressible. */
+ ((UInt*)(sm->vbyte))[ v_off >> 2 ] =3D (UInt)vbytes;
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(232, "helperc_STOREV4-slow2");
+ mc_STOREVn_slow( aA, 4, (ULong)vbytes, False/*littleendian*/ );
+ }
+# endif
}
=20
/* ------------------------ Size =3D 2 ------------------------ */
=20
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV2) ( Addr a )
+UWord MC_(helperc_LOADV2) ( Addr aA )
{
- return (UWord)mc_LOADVn_slow( a, 2, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V2_SLOWLY(a);
-//zz # else
-//zz UInt sec_no =3D rotateRight16(a) & 0x1FFFF;
-//zz SecMap* sm =3D primary_map[sec_no];
-//zz UInt a_off =3D (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(62);
-//zz if (sm->abits[a_off] =3D=3D VGM_BYTE_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off =3D SM_OFF(a);
-//zz return 0xFFFF0000=20
-//zz | =20
-//zz (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V2_SLOWLY(a);
-//zz }
-//zz # endif
+ PROF_EVENT(240, "helperc_LOADV2");
+
+# if VG_DEBUG_MEMORY >=3D 2
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+# else
+
+ const UWord mask =3D ~((0x10000-2) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a =3D (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, either 'a' is not
+ naturally aligned, or 'a' exceeds the range covered by the
+ primary map. Either way we defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(241, "helperc_LOADV2-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+ }
+
+ UWord sec_no =3D (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >=3D 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm =3D primary_map[sec_no];
+ UWord v_off =3D a & 0xFFFF;
+ UWord a_off =3D v_off >> 3;
+ UWord abits =3D (UWord)(sm->abits[a_off]);
+
+ if (EXPECTED_TAKEN(abits =3D=3D VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, and the entire
+ word32 it lives in is addressible. */
+ return (~(UWord)0xFFFF)
+ |
+ (UWord)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(242, "helperc_LOADV2-slow2");
+ return (UWord)mc_LOADVn_slow( aA, 2, False/*littleendian*/ );
+ }
+
+# endif
}
=20
VGA_REGPARM(2)
void MC_(helperc_STOREV2) ( Addr a, UWord vbytes )
{
+ PROF_EVENT(250, "helperc_STOREV2");
mc_STOREVn_slow( a, 2, (ULong)vbytes, False/*littleendian*/ );
//zz # ifdef VG_DEBUG_MEMORY
//zz mc_wr_V2_SLOWLY(a, vbytes);
@@ -1451,49 +1536,91 @@
/* ------------------------ Size =3D 1 ------------------------ */
=20
VGA_REGPARM(1)
-UWord MC_(helperc_LOADV1) ( Addr a )
+UWord MC_(helperc_LOADV1) ( Addr aA )
{
+ PROF_EVENT(260, "helperc_LOADV1");
+
+# if VG_DEBUG_MEMORY >=3D 2
return (UWord)mc_LOADVn_slow( a, 1, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz return mc_rd_V1_SLOWLY(a);
-//zz # else
-//zz UInt sec_no =3D shiftRight16(a);
-//zz SecMap* sm =3D primary_map[sec_no];
-//zz UInt a_off =3D (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(64);
-//zz if (sm->abits[a_off] =3D=3D VGM_BYTE_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off =3D SM_OFF(a);
-//zz return 0xFFFFFF00
-//zz |
-//zz (UInt)( ((UChar*)(sm->vbyte))[ v_off ] );
-//zz } else {
-//zz /* Slow but general case. */
-//zz return mc_rd_V1_SLOWLY(a);
-//zz }
-//zz # endif
+# else
+
+ const UWord mask =3D ~((0x10000-1) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a =3D (UWord)aA;
+
+ /* If any part of 'a' indicated by the mask is 1, it means 'a'
+ exceeds the range covered by the primary map. In which case we
+ defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(261, "helperc_LOADV1-slow1");
+ return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
+ }
+
+ UWord sec_no =3D (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >=3D 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm =3D primary_map[sec_no];
+ UWord v_off =3D a & 0xFFFF;
+ UWord a_off =3D v_off >> 3;
+ UWord abits =3D 0xFF & (UWord)(sm->abits[a_off]);
+ if (EXPECTED_TAKEN(abits =3D=3D VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, and the entire
+ word32 it lives in is addressible. */
+ return (~(UWord)0xFF)
+ |
+ (UWord)( ((UChar*)(sm->vbyte))[ v_off ] );
+ } else {
+ /* Slow but general case. */
+ PROF_EVENT(262, "helperc_LOADV1-slow2");
+ return (UWord)mc_LOADVn_slow( aA, 1, False/*littleendian*/ );
+ }
+# endif
}
=20
+
VGA_REGPARM(2)
-void MC_(helperc_STOREV1) ( Addr a, UWord vbytes )
+void MC_(helperc_STOREV1) ( Addr aA, UWord vbyte )
{
- mc_STOREVn_slow( a, 1, (ULong)vbytes, False/*littleendian*/ );
-//zz # ifdef VG_DEBUG_MEMORY
-//zz mc_wr_V1_SLOWLY(a, vbytes);
-//zz # else
-//zz UInt sec_no =3D shiftRight16(a);
-//zz SecMap* sm =3D primary_map[sec_no];
-//zz UInt a_off =3D (SM_OFF(a)) >> 3;
-//zz PROF_EVENT(65);
-//zz if (!IS_DISTINGUISHED_SM(sm) && sm->abits[a_off] =3D=3D VGM_BYTE=
_VALID) {
-//zz /* Handle common case quickly. */
-//zz UInt v_off =3D SM_OFF(a);
-//zz ((UChar*)(sm->vbyte))[ v_off ] =3D vbytes & 0x000000FF;
-//zz } else {
-//zz /* Slow but general case. */
-//zz mc_wr_V1_SLOWLY(a, vbytes);
-//zz }
-//zz # endif
+ PROF_EVENT(270, "helperc_STOREV1");
+
+# if VG_DEBUG_MEMORY >=3D 2
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+# else
+
+ const UWord mask =3D ~((0x10000-1) | ((N_PRIMARY_MAPS-1) << 16));
+ UWord a =3D (UWord)aA;
+ /* If any part of 'a' indicated by the mask is 1, it means 'a'
+ exceeds the range covered by the primary map. In which case we
+ defer to the slow-path case. */
+ if (EXPECTED_NOT_TAKEN(a & mask)) {
+ PROF_EVENT(271, "helperc_STOREV1-slow1");
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+ return;
+ }
+
+ UWord sec_no =3D (UWord)(a >> 16);
+
+# if VG_DEBUG_MEMORY >=3D 1
+ tl_assert(sec_no < N_PRIMARY_MAPS);
+# endif
+
+ SecMap* sm =3D primary_map[sec_no];
+ UWord v_off =3D a & 0xFFFF;
+ UWord a_off =3D v_off >> 3;
+ UWord abits =3D 0xFF & (UWord)(sm->abits[a_off]);
+ if (EXPECTED_TAKEN(!is_distinguished_sm(sm)=20
+ && abits =3D=3D VGM_BYTE_VALID)) {
+ /* Handle common case quickly: a is mapped, the entire word32 it
+ lives in is addressible. */
+ ((UChar*)(sm->vbyte))[ v_off ] =3D (UChar)vbyte;
+ } else {
+ PROF_EVENT(272, "helperc_STOREV1-slow2");
+ mc_STOREVn_slow( aA, 1, (ULong)vbyte, False/*littleendian*/ );
+ }
+
+# endif
}
=20
=20
@@ -1958,6 +2085,7 @@
Bool TL_(cheap_sanity_check) ( void )
{
/* nothing useful we can rapidly check */
+ PROF_EVENT(490, "cheap_sanity_check");
return True;
}
=20
@@ -1966,6 +2094,8 @@
Int i;
SecMap* sm;
=20
+ PROF_EVENT(491, "expensive_sanity_check");
+
/* Check the 3 distinguished SMs. */
=20
/* Check A invalid, V invalid. */
@@ -2395,6 +2525,8 @@
=20
init_shadow_memory();
MAC_(common_pre_clo_init)();
+
+ tl_assert( TL_(expensive_sanity_check)() );
}
=20
void TL_(post_clo_init) ( void )
|