|
From: <sv...@va...> - 2012-06-03 22:40:19
|
sewardj 2012-06-03 23:40:07 +0100 (Sun, 03 Jun 2012)
New Revision: 12605
Log:
m_machine: add new function VG_(machine_get_size_of_largest_guest_register)
cachegrind: use the new function to abort startup if the minumum line
size is smaller than the size of the largest guest register.
Partially derived from a patch by Josef Weidendorfer.
Modified files:
trunk/cachegrind/cg_main.c
trunk/coregrind/m_machine.c
trunk/include/pub_tool_machine.h
Modified: trunk/include/pub_tool_machine.h (+6 -0)
===================================================================
--- trunk/include/pub_tool_machine.h 2012-06-03 23:37:15 +01:00 (rev 12604)
+++ trunk/include/pub_tool_machine.h 2012-06-03 23:40:07 +01:00 (rev 12605)
@@ -152,6 +152,12 @@
// ppc64-linux it isn't (sigh).
extern void* VG_(fnptr_to_fnentry)( void* );
+/* Returns the size of the largest guest register that we will
+ simulate in this run. This depends on both the guest architecture
+ and on the specific capabilities we are simulating for that guest
+ (eg, AVX or non-AVX ?, for amd64). */
+extern Int VG_(machine_get_size_of_largest_guest_register) ( void );
+
#endif // __PUB_TOOL_MACHINE_H
/*--------------------------------------------------------------------*/
Modified: trunk/cachegrind/cg_main.c (+32 -6)
===================================================================
--- trunk/cachegrind/cg_main.c 2012-06-03 23:37:15 +01:00 (rev 12604)
+++ trunk/cachegrind/cg_main.c 2012-06-03 23:40:07 +01:00 (rev 12605)
@@ -70,6 +70,12 @@
static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
/*------------------------------------------------------------*/
+/*--- Cachesim configuration ---*/
+/*------------------------------------------------------------*/
+
+static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
+
+/*------------------------------------------------------------*/
/*--- Types and Data Structures ---*/
/*------------------------------------------------------------*/
@@ -846,7 +852,7 @@
{
Event* evt;
tl_assert(isIRAtom(ea));
- tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ tl_assert(datasize >= 1 && datasize <= min_line_size);
if (!clo_cache_sim)
return;
if (cgs->events_used == N_EVENTS)
@@ -868,7 +874,7 @@
Event* evt;
tl_assert(isIRAtom(ea));
- tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ tl_assert(datasize >= 1 && datasize <= min_line_size);
if (!clo_cache_sim)
return;
@@ -1058,8 +1064,8 @@
// instructions will be done inaccurately, but they're
// very rare and this avoids errors from hitting more
// than two cache lines in the simulation.
- if (dataSize > MIN_LINE_SIZE)
- dataSize = MIN_LINE_SIZE;
+ if (dataSize > min_line_size)
+ dataSize = min_line_size;
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
@@ -1085,8 +1091,8 @@
if (cas->dataHi != NULL)
dataSize *= 2; /* since it's a doubleword-CAS */
/* I don't think this can ever happen, but play safe. */
- if (dataSize > MIN_LINE_SIZE)
- dataSize = MIN_LINE_SIZE;
+ if (dataSize > min_line_size)
+ dataSize = min_line_size;
addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
break;
@@ -1724,6 +1730,26 @@
&clo_D1_cache,
&clo_LL_cache);
+ // min_line_size is used to make sure that we never feed
+ // accesses to the simulator straddling more than two
+ // cache lines at any cache level
+ min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
+ min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
+
+ Int largest_load_or_store_size
+ = VG_(machine_get_size_of_largest_guest_register)();
+ if (min_line_size < largest_load_or_store_size) {
+ /* We can't continue, because the cache simulation might
+ straddle more than 2 lines, and it will assert. So let's
+ just stop before we start. */
+ VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
+ (Int)min_line_size);
+ VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
+ largest_load_or_store_size );
+ VG_(umsg)(" but it is not. Exiting now.\n");
+ VG_(exit)(1);
+ }
+
cachesim_I1_initcache(I1c);
cachesim_D1_initcache(D1c);
cachesim_LL_initcache(LLc);
Modified: trunk/coregrind/m_machine.c (+61 -1)
===================================================================
--- trunk/coregrind/m_machine.c 2012-06-03 23:37:15 +01:00 (rev 12604)
+++ trunk/coregrind/m_machine.c 2012-06-03 23:40:07 +01:00 (rev 12605)
@@ -367,7 +367,7 @@
static Bool hwcaps_done = False;
/* --- all archs --- */
-static VexArch va;
+static VexArch va = VexArch_INVALID;
static VexArchInfo vai;
#if defined(VGA_x86)
@@ -1316,6 +1316,66 @@
}
+/* Returns the size of the largest guest register that we will
+ simulate in this run. This depends on both the guest architecture
+ and on the specific capabilities we are simulating for that guest
+ (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
+ or 32. General rule: if in doubt, return a value larger than
+ reality.
+
+ This information is needed by Cachegrind and Callgrind to decide
+ what the minimum cache line size they are prepared to simulate is.
+ Basically require that the minimum cache line size is at least as
+ large as the largest register that might get transferred to/from
+ memory, so as to guarantee that any such transaction can straddle
+ at most 2 cache lines.
+*/
+Int VG_(machine_get_size_of_largest_guest_register) ( void )
+{
+ vg_assert(hwcaps_done);
+ /* Once hwcaps_done is True, we can fish around inside va/vai to
+ find the information we need. */
+
+# if defined(VGA_x86)
+ vg_assert(va == VexArchX86);
+ /* We don't support AVX, so 32 is out. At the other end, even if
+ we don't support any SSE, the X87 can generate 10 byte
+ transfers, so let's say 16 to be on the safe side. Hence the
+ answer is always 16. */
+ return 16;
+
+# elif defined(VGA_amd64)
+ /* if AVX then 32 else 16 */
+ return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
+
+# elif defined(VGA_ppc32)
+ /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
+ return 8;
+
+# elif defined(VGA_ppc64)
+ /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
+ return 8;
+
+# elif defined(VGA_s390x)
+ return 8;
+
+# elif defined(VGA_arm)
+ /* Really it depends whether or not we have NEON, but let's just
+ assume we always do. */
+ return 16;
+
+# else
+# error "Unknown arch"
+# endif
+}
+
+
// Given a pointer to a function as obtained by "& functionname" in C,
// produce a pointer to the actual entry point for the function.
void* VG_(fnptr_to_fnentry)( void* f )
|