|
From: <sv...@va...> - 2011-06-13 13:25:01
|
Author: sewardj
Date: 2011-06-13 14:14:00 +0100 (Mon, 13 Jun 2011)
New Revision: 11812
Log:
Try to handle LL caches which are of size 50% above a power of 2 (eg,
6MB, 12MB) and have a non-power-of-2 number of sets.
Modified:
trunk/cachegrind/cg-x86-amd64.c
trunk/coregrind/m_libcbase.c
trunk/include/pub_tool_libcbase.h
Modified: trunk/cachegrind/cg-x86-amd64.c
===================================================================
--- trunk/cachegrind/cg-x86-amd64.c 2011-06-10 20:29:27 UTC (rev 11811)
+++ trunk/cachegrind/cg-x86-amd64.c 2011-06-13 13:14:00 UTC (rev 11812)
@@ -464,6 +464,59 @@
D1c->size *= 1024;
LLc->size *= 1024;
+ /* If the LL cache config isn't something the simulation functions
+ can handle, try to adjust it so it is. Caches are characterised
+ by (total size T, line size L, associativity A), and then we
+ have
+
+ number of sets S = T / (L * A)
+
+ The required constraints are:
+
+ * L must be a power of 2, but it always is in practice, so
+ no problem there
+
+ * A can be any value >= 1
+
+ * T can be any value, but ..
+
+ * S must be a power of 2.
+
+ That sometimes gives a problem. For example, some Core iX based
+ Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
+ sets. The "fix" in this case is to increase the associativity
+ by 50% to 24, which reduces the number of sets to 8192, making
+ it a power of 2. That's what the following code does (handing
+ the "3/2 rescaling case".) We might need to deal with other
+ ratios later (5/4 ?).
+
+ The "fix" is "justified" (cough, cough) by alleging that
+ increases of associativity above about 4 have very little effect
+ on the actual miss rate. It would be far more inaccurate to
+ fudge this by changing the size of the simulated cache --
+ changing the associativity is a much better option.
+ */
+ if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
+ Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
+ if (/* stay sane */
+ nSets >= 4
+ /* nSets is not a power of 2 */
+ && VG_(log2_64)( (ULong)nSets ) == -1
+ /* nSets is 50% above a power of 2 */
+ && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
+ /* associativity can be increased by exactly 50% */
+ && (LLc->assoc % 2) == 0
+ ) {
+ /* # sets is 1.5 * a power of two, but the associativity is
+ even, so we can increase that up by 50% and implicitly
+ scale the # sets down accordingly. */
+ Int new_assoc = LLc->assoc + (LLc->assoc / 2);
+ VG_(dmsg)("warning: pretending that LL cache has associativity"
+ " %d instead of actual %d\n", new_assoc, LLc->assoc);
+ LLc->assoc = new_assoc;
+ }
+ }
+
return ret;
}
Modified: trunk/coregrind/m_libcbase.c
===================================================================
--- trunk/coregrind/m_libcbase.c 2011-06-10 20:29:27 UTC (rev 11811)
+++ trunk/coregrind/m_libcbase.c 2011-06-13 13:14:00 UTC (rev 11812)
@@ -794,6 +794,15 @@
return -1;
}
+/* Ditto for 64 bit numbers. */
+Int VG_(log2_64) ( ULong x )
+{
+ Int i;
+ for (i = 0; i < 64; i++) {
+ if ((1ULL << i) == x) return i;
+ }
+ return -1;
+}
// Generic quick sort.
void VG_(ssort)( void* base, SizeT nmemb, SizeT size,
Modified: trunk/include/pub_tool_libcbase.h
===================================================================
--- trunk/include/pub_tool_libcbase.h 2011-06-10 20:29:27 UTC (rev 11811)
+++ trunk/include/pub_tool_libcbase.h 2011-06-13 13:14:00 UTC (rev 11812)
@@ -181,10 +181,13 @@
extern void VG_(ssort)( void* base, SizeT nmemb, SizeT size,
Int (*compar)(void*, void*) );
-/* Returns the base-2 logarithm of x. Returns -1 if x is not a power
- of two. Nb: VG_(log2)(1) == 0. */
+/* Returns the base-2 logarithm of a 32 bit unsigned number. Returns
+ -1 if it is not a power of two. Nb: VG_(log2)(1) == 0. */
extern Int VG_(log2) ( UInt x );
+/* Ditto for 64 bit unsigned numbers. */
+extern Int VG_(log2_64)( ULong x );
+
// A pseudo-random number generator returning a random UInt. If pSeed
// is NULL, it uses its own seed, which starts at zero. If pSeed is
// non-NULL, it uses and updates whatever pSeed points at.
|