|
From: <sv...@va...> - 2009-01-29 10:14:59
|
Author: sewardj
Date: 2009-01-29 10:14:53 +0000 (Thu, 29 Jan 2009)
New Revision: 9088
Log:
Merge in non-power-of-2-sized cache simulation fixes for Callgrind
and Cachegrind:
8912 callgrind/tests/filter_stderr
cachegrind/tests/filter_stderr
Filter out an additional warning, so the tests pass on
machines with a 6MB L2 cache.
cachegrind/cg-x86.c
cachegrind/cg-amd64.c
These two files were almost identical. cg-amd64.c now just
#includes cg-x86.c.
9080 Cachegrind/Callgrind: allow for cache sizes other than only
powers of two
9081 Callgrind: Remove ifdef'ed-out, non-working code.
Added:
branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.stderr.exp
branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.vgtest
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.stderr.exp
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.vgtest
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.stderr.exp
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.vgtest
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.stderr.exp
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.vgtest
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.stderr.exp
branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.vgtest
Modified:
branches/VALGRIND_3_4_BRANCH/cachegrind/cg-amd64.c
branches/VALGRIND_3_4_BRANCH/cachegrind/cg-x86.c
branches/VALGRIND_3_4_BRANCH/cachegrind/cg_main.c
branches/VALGRIND_3_4_BRANCH/cachegrind/cg_sim.c
branches/VALGRIND_3_4_BRANCH/cachegrind/docs/cg-manual.xml
branches/VALGRIND_3_4_BRANCH/cachegrind/tests/Makefile.am
branches/VALGRIND_3_4_BRANCH/callgrind/sim.c
branches/VALGRIND_3_4_BRANCH/callgrind/tests/Makefile.am
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/cg-amd64.c
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/cg-amd64.c 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/cg-amd64.c 2009-01-29 10:14:53 UTC (rev 9088)
@@ -28,340 +28,8 @@
The GNU General Public License is contained in the file COPYING.
*/
-#include "pub_tool_basics.h"
-#include "pub_tool_cpuid.h"
-#include "pub_tool_libcbase.h"
-#include "pub_tool_libcassert.h"
-#include "pub_tool_libcprint.h"
+#include "cg-x86.c"
-#include "cg_arch.h"
-
-// All CPUID info taken from sandpile.org/a32/cpuid.htm */
-// Probably only works for Intel and AMD chips, and probably only for some of
-// them.
-
-static void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
-{
- VG_(message)(Vg_DebugMsg,
- "warning: Pentium 4 with %d KB micro-op instruction trace cache",
- actual_size);
- VG_(message)(Vg_DebugMsg,
- " Simulating a %d KB I-cache with %d B lines",
- used_size, line_size);
-}
-
-/* Intel method is truly wretched. We have to do an insane indexing into an
- * array of pre-defined configurations for various parts of the memory
- * hierarchy.
- * According to Intel Processor Identification, App Note 485.
- */
-static
-Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c)
-{
- Int cpuid1_eax;
- Int cpuid1_ignore;
- Int family;
- Int model;
- UChar info[16];
- Int i, trials;
- Bool L2_found = False;
-
- if (level < 2) {
- VG_(message)(Vg_DebugMsg,
- "warning: CPUID level < 2 for Intel processor (%d)",
- level);
- return -1;
- }
-
- /* family/model needed to distinguish code reuse (currently 0x49) */
- VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
- &cpuid1_ignore, &cpuid1_ignore);
- family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
- model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
-
- VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
- (Int*)&info[8], (Int*)&info[12]);
- trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
- info[0] = 0x0; /* reset AL */
-
- if (0 != trials) {
- VG_(message)(Vg_DebugMsg,
- "warning: non-zero CPUID trials for Intel processor (%d)",
- trials);
- return -1;
- }
-
- for (i = 0; i < 16; i++) {
-
- switch (info[i]) {
-
- case 0x0: /* ignore zeros */
- break;
-
- /* TLB info, ignore */
- case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
- case 0x4f: case 0x50: case 0x51: case 0x52:
- case 0x56: case 0x57: case 0x59:
- case 0x5b: case 0x5c: case 0x5d:
- case 0xb0: case 0xb1:
- case 0xb3: case 0xb4: case 0xba: case 0xc0:
- break;
-
- case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break;
- case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
- case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
-
- case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
- case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
- case 0x0e:
- /* Real D1 cache configuration is:
- D1c = (cache_t) { 24, 6, 64 }; */
- VG_(message)(Vg_DebugMsg, "warning: 24Kb D1 cache detected, treating as 16Kb");
- *D1c = (cache_t) { 16, 4, 64 };
- break;
- case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
-
- /* IA-64 info -- panic! */
- case 0x10: case 0x15: case 0x1a:
- case 0x88: case 0x89: case 0x8a: case 0x8d:
- case 0x90: case 0x96: case 0x9b:
- VG_(tool_panic)("IA-64 cache detected?!");
-
- case 0x22: case 0x23: case 0x25: case 0x29:
- case 0x46: case 0x47: case 0x4a: case 0x4b: case 0x4c: case 0x4d:
- VG_(message)(Vg_DebugMsg,
- "warning: L3 cache detected but ignored");
- break;
-
- /* These are sectored, whatever that means */
- case 0x39: *L2c = (cache_t) { 128, 4, 64 }; L2_found = True; break;
- case 0x3c: *L2c = (cache_t) { 256, 4, 64 }; L2_found = True; break;
-
- /* If a P6 core, this means "no L2 cache".
- If a P4 core, this means "no L3 cache".
- We don't know what core it is, so don't issue a warning. To detect
- a missing L2 cache, we use 'L2_found'. */
- case 0x40:
- break;
-
- case 0x41: *L2c = (cache_t) { 128, 4, 32 }; L2_found = True; break;
- case 0x42: *L2c = (cache_t) { 256, 4, 32 }; L2_found = True; break;
- case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
- case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
- case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
- case 0x48:
- /* Real L2 cache configuration is:
- *L2c = (cache_t) { 3072, 12, 64 }; L2_found = True; */
- VG_(message)(Vg_DebugMsg, "warning: 3Mb L2 cache detected, treating as 2Mb");
- *L2c = (cache_t) { 2048, 8, 64 }; L2_found = True;
- break;
- case 0x49:
- if ((family == 15) && (model == 6))
- /* On Xeon MP (family F, model 6), this is for L3 */
- VG_(message)(Vg_DebugMsg,
- "warning: L3 cache detected but ignored");
- else
- *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
- break;
- case 0x4e:
- /* Real L2 cache configuration is:
- *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; */
- VG_(message)(Vg_DebugMsg, "warning: 6Mb L2 cache detected, treating as 4Mb");
- *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
- break;
-
- /* These are sectored, whatever that means */
- case 0x60: *D1c = (cache_t) { 16, 8, 64 }; break; /* sectored */
- case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */
- case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */
- case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */
-
- /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
- * conversion to byte size is a total guess; treat the 12K and 16K
- * cases the same since the cache byte size must be a power of two for
- * everything to work!. Also guessing 32 bytes for the line size...
- */
- case 0x70: /* 12K micro-ops, 8-way */
- *I1c = (cache_t) { 16, 8, 32 };
- micro_ops_warn(12, 16, 32);
- break;
- case 0x71: /* 16K micro-ops, 8-way */
- *I1c = (cache_t) { 16, 8, 32 };
- micro_ops_warn(16, 16, 32);
- break;
- case 0x72: /* 32K micro-ops, 8-way */
- *I1c = (cache_t) { 32, 8, 32 };
- micro_ops_warn(32, 32, 32);
- break;
-
- /* These are sectored, whatever that means */
- case 0x79: *L2c = (cache_t) { 128, 8, 64 }; L2_found = True; break;
- case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; L2_found = True; break;
- case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
- case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
- case 0x7d: *L2c = (cache_t) { 2048, 8, 64 }; L2_found = True; break;
- case 0x7e: *L2c = (cache_t) { 256, 8, 128 }; L2_found = True; break;
-
- case 0x7f: *L2c = (cache_t) { 512, 2, 64 }; L2_found = True; break;
- case 0x80: *L2c = (cache_t) { 512, 8, 64 }; L2_found = True; break;
-
- case 0x81: *L2c = (cache_t) { 128, 8, 32 }; L2_found = True; break;
- case 0x82: *L2c = (cache_t) { 256, 8, 32 }; L2_found = True; break;
- case 0x83: *L2c = (cache_t) { 512, 8, 32 }; L2_found = True; break;
- case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; L2_found = True; break;
- case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; L2_found = True; break;
- case 0x86: *L2c = (cache_t) { 512, 4, 64 }; L2_found = True; break;
- case 0x87: *L2c = (cache_t) { 1024, 8, 64 }; L2_found = True; break;
-
- /* Ignore prefetch information */
- case 0xf0: case 0xf1:
- break;
-
- default:
- VG_(message)(Vg_DebugMsg,
- "warning: Unknown Intel cache config value "
- "(0x%x), ignoring", info[i]);
- break;
- }
- }
-
- if (!L2_found)
- VG_(message)(Vg_DebugMsg,
- "warning: L2 cache not installed, ignore L2 results.");
-
- return 0;
-}
-
-/* AMD method is straightforward, just extract appropriate bits from the
- * result registers.
- *
- * Bits, for D1 and I1:
- * 31..24 data L1 cache size in KBs
- * 23..16 data L1 cache associativity (FFh=full)
- * 15.. 8 data L1 cache lines per tag
- * 7.. 0 data L1 cache line size in bytes
- *
- * Bits, for L2:
- * 31..16 unified L2 cache size in KBs
- * 15..12 unified L2 cache associativity (0=off, FFh=full)
- * 11.. 8 unified L2 cache lines per tag
- * 7.. 0 unified L2 cache line size in bytes
- *
- * #3 The AMD K7 processor's L2 cache must be configured prior to relying
- * upon this information. (Whatever that means -- njn)
- *
- * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
- * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
- * so we detect that.
- *
- * Returns 0 on success, non-zero on failure.
- */
-static
-Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c)
-{
- UInt ext_level;
- UInt dummy, model;
- UInt I1i, D1i, L2i;
-
- VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
-
- if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
- VG_(message)(Vg_UserMsg,
- "warning: ext_level < 0x80000006 for AMD processor (0x%x)",
- ext_level);
- return -1;
- }
-
- VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
- VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &dummy);
-
- VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
-
- /* Check for Duron bug */
- if (model == 0x630) {
- VG_(message)(Vg_UserMsg,
- "Buggy Duron stepping A0. Assuming L2 size=65536 bytes");
- L2i = (64 << 16) | (L2i & 0xffff);
- }
-
- D1c->size = (D1i >> 24) & 0xff;
- D1c->assoc = (D1i >> 16) & 0xff;
- D1c->line_size = (D1i >> 0) & 0xff;
-
- I1c->size = (I1i >> 24) & 0xff;
- I1c->assoc = (I1i >> 16) & 0xff;
- I1c->line_size = (I1i >> 0) & 0xff;
-
- L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
- L2c->assoc = (L2i >> 12) & 0xf;
- L2c->line_size = (L2i >> 0) & 0xff;
-
- return 0;
-}
-
-static
-Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c)
-{
- Int level, ret;
- Char vendor_id[13];
-
- if (!VG_(has_cpuid)()) {
- VG_(message)(Vg_DebugMsg, "CPUID instruction not supported");
- return -1;
- }
-
- VG_(cpuid)(0, &level, (int*)&vendor_id[0],
- (int*)&vendor_id[8], (int*)&vendor_id[4]);
- vendor_id[12] = '\0';
-
- if (0 == level) {
- VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?");
- return -1;
- }
-
- /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
- if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
- ret = Intel_cache_info(level, I1c, D1c, L2c);
-
- } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
- ret = AMD_cache_info(I1c, D1c, L2c);
-
- } else {
- VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)",
- vendor_id);
- return -1;
- }
-
- /* Successful! Convert sizes from KB to bytes */
- I1c->size *= 1024;
- D1c->size *= 1024;
- L2c->size *= 1024;
-
- return ret;
-}
-
-
-void VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* L2c,
- Bool all_caches_clo_defined)
-{
- Int res;
-
- // Set caches to default.
- *I1c = (cache_t) { 65536, 2, 64 };
- *D1c = (cache_t) { 65536, 2, 64 };
- *L2c = (cache_t) { 262144, 8, 64 };
-
- // Then replace with any info we can get from CPUID.
- res = get_caches_from_CPUID(I1c, D1c, L2c);
-
- // Warn if CPUID failed and config not completely specified from cmd line.
- if (res != 0 && !all_caches_clo_defined) {
- VG_(message)(Vg_DebugMsg,
- "Warning: Couldn't auto-detect cache config, using one "
- "or more defaults ");
- }
-}
-
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/cg-x86.c
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/cg-x86.c 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/cg-x86.c 2009-01-29 10:14:53 UTC (rev 9088)
@@ -1,6 +1,6 @@
/*--------------------------------------------------------------------*/
-/*--- x86-specific definitions. cg-x86.c ---*/
+/*--- x86-specific (and AMD64-specific) definitions. cg-x86.c ---*/
/*--------------------------------------------------------------------*/
/*
@@ -113,12 +113,7 @@
case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break;
case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
- case 0x0e:
- /* Real D1 cache configuration is:
- D1c = (cache_t) { 24, 6, 64 }; */
- VG_(message)(Vg_DebugMsg, "warning: 24Kb D1 cache detected, treating as 16Kb");
- *D1c = (cache_t) { 16, 4, 64 };
- break;
+ case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
/* IA-64 info -- panic! */
@@ -149,12 +144,7 @@
case 0x43: *L2c = (cache_t) { 512, 4, 32 }; L2_found = True; break;
case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; L2_found = True; break;
case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; L2_found = True; break;
- case 0x48:
- /* Real L2 cache configuration is:
- *L2c = (cache_t) { 3072, 12, 64 }; L2_found = True; */
- VG_(message)(Vg_DebugMsg, "warning: 3Mb L2 cache detected, treating as 2Mb");
- *L2c = (cache_t) { 2048, 8, 64 }; L2_found = True;
- break;
+ case 0x48: *L2c = (cache_t) { 3072,12, 64 }; L2_found = True; break;
case 0x49:
if ((family == 15) && (model == 6))
/* On Xeon MP (family F, model 6), this is for L3 */
@@ -163,12 +153,7 @@
else
*L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
break;
- case 0x4e:
- /* Real L2 cache configuration is:
- *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; */
- VG_(message)(Vg_DebugMsg, "warning: 6Mb L2 cache detected, treating as 4Mb");
- *L2c = (cache_t) { 4096, 16, 64 }; L2_found = True;
- break;
+ case 0x4e: *L2c = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
/* These are sectored, whatever that means */
case 0x60: *D1c = (cache_t) { 16, 8, 64 }; break; /* sectored */
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/cg_main.c
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/cg_main.c 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/cg_main.c 2009-01-29 10:14:53 UTC (rev 9088)
@@ -1158,21 +1158,15 @@
static
void check_cache(cache_t* cache, Char *name)
{
- /* First check they're all powers of two */
- if (-1 == VG_(log2)(cache->size)) {
+ /* Simulator requires line size and set count to be powers of two */
+ if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+ (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
VG_(message)(Vg_UserMsg,
- "error: %s size of %dB not a power of two; aborting.",
- name, cache->size);
+ "error: %s set count not a power of two; aborting.",
+ name);
VG_(exit)(1);
}
- if (-1 == VG_(log2)(cache->assoc)) {
- VG_(message)(Vg_UserMsg,
- "error: %s associativity of %d not a power of two; aborting.",
- name, cache->assoc);
- VG_(exit)(1);
- }
-
if (-1 == VG_(log2)(cache->line_size)) {
VG_(message)(Vg_UserMsg,
"error: %s line size of %dB not a power of two; aborting.",
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/cg_sim.c
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/cg_sim.c 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/cg_sim.c 2009-01-29 10:14:53 UTC (rev 9088)
@@ -44,7 +44,6 @@
Int line_size; /* bytes */
Int sets;
Int sets_min_1;
- Int assoc_bits;
Int line_size_bits;
Int tag_shift;
Char desc_line[128];
@@ -62,7 +61,6 @@
c->sets = (c->size / c->line_size) / c->assoc;
c->sets_min_1 = c->sets - 1;
- c->assoc_bits = VG_(log2)(c->assoc);
c->line_size_bits = VG_(log2)(c->line_size);
c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
@@ -111,8 +109,7 @@
/* First case: word entirely within line. */ \
if (set1 == set2) { \
\
- /* Shifting is a bit faster than multiplying */ \
- set = &(L.tags[set1 << L.assoc_bits]); \
+ set = &(L.tags[set1 * L.assoc]); \
\
/* This loop is unrolled for just the first case, which is the most */\
/* common. We can't unroll any further because it would screw up */\
@@ -143,7 +140,7 @@
/* Second case: word straddles two lines. */ \
/* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
} else if (((set1 + 1) & (L.sets-1)) == set2) { \
- set = &(L.tags[set1 << L.assoc_bits]); \
+ set = &(L.tags[set1 * L.assoc]); \
if (tag == set[0]) { \
goto block2; \
} \
@@ -162,7 +159,7 @@
set[0] = tag; \
is_miss = True; \
block2: \
- set = &(L.tags[set2 << L.assoc_bits]); \
+ set = &(L.tags[set2 * L.assoc]); \
tag2 = (a+size-1) >> L.tag_shift; \
if (tag2 == set[0]) { \
goto miss_treatment; \
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/docs/cg-manual.xml
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/docs/cg-manual.xml 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/docs/cg-manual.xml 2009-01-29 10:14:53 UTC (rev 9088)
@@ -142,7 +142,7 @@
</listitem>
<listitem>
- <para>Bit-selection hash function: the line(s) in the cache
+ <para>Bit-selection hash function: the set of line(s) in the cache
to which a memory block maps is chosen by the middle bits
M--(M+N-1) of the byte address, where:</para>
<itemizedlist>
@@ -150,15 +150,17 @@
<para>line size = 2^M bytes</para>
</listitem>
<listitem>
- <para>(cache size / line size) = 2^N bytes</para>
+ <para>(cache size / line size / associativity) = 2^N bytes</para>
</listitem>
</itemizedlist>
</listitem>
<listitem>
- <para>Inclusive L2 cache: the L2 cache replicates all the
- entries of the L1 cache. This is standard on Pentium chips,
- but AMD Opterons, Athlons and Durons
+ <para>Inclusive L2 cache: the L2 cache typically replicates all
+ the entries of the L1 caches, because fetching into L1 involves
+ fetching into L2 first (this does not guarantee strict inclusiveness,
+ as lines evicted from L2 still could reside in L1). This is
+ standard on Pentium chips, but AMD Opterons, Athlons and Durons
use an exclusive L2 cache that only holds
blocks evicted from L1. Ditto most modern VIA CPUs.</para>
</listitem>
@@ -176,7 +178,10 @@
(I1/D1/L2) of the cache from the command line using the
<computeroutput>--I1</computeroutput>,
<computeroutput>--D1</computeroutput> and
-<computeroutput>--L2</computeroutput> options.</para>
+<computeroutput>--L2</computeroutput> options.
+For cache parameters to be valid for simulation, the number
+of sets (with associativity being the number of cache lines in
+each set) has to be a power of two.</para>
<para>On PowerPC platforms
Cachegrind cannot automatically
@@ -227,10 +232,7 @@
<para>If you are interested in simulating a cache with different
properties, it is not particularly hard to write your own cache
simulator, or to modify the existing ones in
-<computeroutput>vg_cachesim_I1.c</computeroutput>,
-<computeroutput>vg_cachesim_D1.c</computeroutput>,
-<computeroutput>vg_cachesim_L2.c</computeroutput> and
-<computeroutput>vg_cachesim_gen.c</computeroutput>. We'd be
+<computeroutput>cg_sim.c</computeroutput>. We'd be
interested to hear from anyone who does.</para>
</sect2>
Modified: branches/VALGRIND_3_4_BRANCH/cachegrind/tests/Makefile.am
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/tests/Makefile.am 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/tests/Makefile.am 2009-01-29 10:14:53 UTC (rev 9088)
@@ -24,6 +24,7 @@
chdir.vgtest chdir.stderr.exp \
clreq.vgtest clreq.stderr.exp \
dlclose.vgtest dlclose.stderr.exp dlclose.stdout.exp \
+ notpower2.vgtest notpower2.stderr.exp \
wrap5.vgtest wrap5.stderr.exp wrap5.stdout.exp
check_PROGRAMS = \
Copied: branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.stderr.exp (from rev 9080, trunk/cachegrind/tests/notpower2.stderr.exp)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.stderr.exp (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.stderr.exp 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,17 @@
+
+
+I refs:
+I1 misses:
+L2i misses:
+I1 miss rate:
+L2i miss rate:
+
+D refs:
+D1 misses:
+L2d misses:
+D1 miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
Copied: branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.vgtest (from rev 9080, trunk/cachegrind/tests/notpower2.vgtest)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.vgtest (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/cachegrind/tests/notpower2.vgtest 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm cachegrind.out.*
Modified: branches/VALGRIND_3_4_BRANCH/callgrind/sim.c
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/sim.c 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/sim.c 2009-01-29 10:14:53 UTC (rev 9088)
@@ -74,7 +74,6 @@
Bool sectored; /* prefetch nearside cacheline on read */
int sets;
int sets_min_1;
- int assoc_bits;
int line_size_bits;
int tag_shift;
UWord tag_mask;
@@ -195,7 +194,6 @@
c->sets = (c->size / c->line_size) / c->assoc;
c->sets_min_1 = c->sets - 1;
- c->assoc_bits = VG_(log2)(c->assoc);
c->line_size_bits = VG_(log2)(c->line_size);
c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
c->tag_mask = ~((1<<c->tag_shift)-1);
@@ -259,8 +257,7 @@
int i, j;
UWord *set;
- /* Shifting is a bit faster than multiplying */
- set = &(c->tags[set_no << c->assoc_bits]);
+ set = &(c->tags[set_no * c->assoc]);
/* This loop is unrolled for just the first case, which is the most */
/* common. We can't unroll any further because it would screw up */
@@ -359,8 +356,7 @@
int i, j;
UWord *set, tmp_tag;
- /* Shifting is a bit faster than multiplying */
- set = &(c->tags[set_no << c->assoc_bits]);
+ set = &(c->tags[set_no * c->assoc]);
/* This loop is unrolled for just the first case, which is the most */
/* common. We can't unroll any further because it would screw up */
@@ -407,7 +403,7 @@
/* Access straddles two lines. */
/* Nb: this is a fast way of doing ((set1+1) % c->sets) */
else if (((set1 + 1) & (c->sets-1)) == set2) {
- UWord tag2 = (a+size-1) >> c->tag_shift;
+ UWord tag2 = (a+size-1) & c->tag_mask;
/* the call updates cache structures as side effect */
CacheResult res1 = cachesim_setref_wb(c, ref, set1, tag);
@@ -676,7 +672,7 @@
/* We use lower tag bits as offset pointers to cache use info.
* I.e. some cache parameters don't work.
*/
- if (c->tag_shift < c->assoc_bits) {
+ if ( (1<<c->tag_shift) < c->assoc) {
VG_(message)(Vg_DebugMsg,
"error: Use associativity < %d for cache use statistics!",
(1<<c->tag_shift) );
@@ -684,103 +680,7 @@
}
}
-/* FIXME: A little tricky */
-#if 0
-static __inline__
-void cacheuse_update_hit(cache_t2* c, UInt high_idx, UInt low_idx, UInt use_mask)
-{
- int idx = (high_idx << c->assoc_bits) | low_idx;
-
- c->use[idx].count ++;
- c->use[idx].mask |= use_mask;
-
- CLG_DEBUG(6," Hit [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",
- idx, c->loaded[idx].memline, c->loaded[idx].iaddr,
- use_mask, c->use[idx].mask, c->use[idx].count);
-}
-
-/* only used for I1, D1 */
-
-static __inline__
-CacheResult cacheuse_setref(cache_t2* c, UInt set_no, UWord tag)
-{
- int i, j, idx;
- UWord *set, tmp_tag;
- UInt use_mask;
-
- /* Shifting is a bit faster than multiplying */
- set = &(c->tags[set_no << c->assoc_bits]);
- use_mask =
- c->line_start_mask[a & c->line_size_mask] &
- c->line_end_mask[(a+size-1) & c->line_size_mask];
-
- /* This loop is unrolled for just the first case, which is the most */
- /* common. We can't unroll any further because it would screw up */
- /* if we have a direct-mapped (1-way) cache. */
- if (tag == (set[0] & c->tag_mask)) {
- cacheuse_update(c, set_no, set[0] & ~c->tag_mask, use_mask);
- return L1_Hit;
- }
-
- /* If the tag is one other than the MRU, move it into the MRU spot */
- /* and shuffle the rest down. */
- for (i = 1; i < c->assoc; i++) {
- if (tag == (set[i] & c->tag_mask)) {
- tmp_tag = set[i];
- for (j = i; j > 0; j--) {
- set[j] = set[j - 1];
- }
- set[0] = tmp_tag;
-
- cacheuse_update(c, set_no, tmp_tag & ~c->tag_mask, use_mask);
- return L1_Hit;
- }
- }
-
- /* A miss; install this tag as MRU, shuffle rest down. */
- tmp_tag = set[L.assoc - 1] & ~c->tag_mask;
- for (j = c->assoc - 1; j > 0; j--) {
- set[j] = set[j - 1];
- }
- set[0] = tag | tmp_tag;
-
- cacheuse_L2_miss(c, (set_no << c->assoc_bits) | tmp_tag,
- use_mask, a & ~c->line_size_mask);
-
- return Miss;
-}
-
-
-static CacheResult cacheuse_ref(cache_t2* c, Addr a, UChar size)
-{
- UInt set1 = ( a >> c->line_size_bits) & (c->sets_min_1);
- UInt set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
- UWord tag = a >> c->tag_shift;
-
- /* Access entirely within line. */
- if (set1 == set2)
- return cacheuse_setref(c, set1, tag);
-
- /* Access straddles two lines. */
- /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
- else if (((set1 + 1) & (c->sets-1)) == set2) {
- UWord tag2 = a >> c->tag_shift;
-
- /* the call updates cache structures as side effect */
- CacheResult res1 = cacheuse_isMiss(c, set1, tag);
- CacheResult res2 = cacheuse_isMiss(c, set2, tag2);
- return ((res1 == Miss) || (res2 == Miss)) ? Miss : Hit;
-
- } else {
- VG_(printf)("addr: %x size: %u sets: %d %d", a, size, set1, set2);
- VG_(tool_panic)("item straddles more than two cache sets");
- }
- return Hit;
-}
-#endif
-
-
/* for I1/D1 caches */
#define CACHEUSE(L) \
\
@@ -800,8 +700,7 @@
/* First case: word entirely within line. */ \
if (set1 == set2) { \
\
- /* Shifting is a bit faster than multiplying */ \
- set = &(L.tags[set1 << L.assoc_bits]); \
+ set = &(L.tags[set1 * L.assoc]); \
use_mask = L.line_start_mask[a & L.line_size_mask] & \
L.line_end_mask[(a+size-1) & L.line_size_mask]; \
\
@@ -809,7 +708,7 @@
/* common. We can't unroll any further because it would screw up */\
/* if we have a direct-mapped (1-way) cache. */\
if (tag == (set[0] & L.tag_mask)) { \
- idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -826,7 +725,7 @@
set[j] = set[j - 1]; \
} \
set[0] = tmp_tag; \
- idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -842,7 +741,7 @@
set[j] = set[j - 1]; \
} \
set[0] = tag | tmp_tag; \
- idx = (set1 << L.assoc_bits) | tmp_tag; \
+ idx = (set1 * L.assoc) + tmp_tag; \
return update_##L##_use(&L, idx, \
use_mask, a &~ L.line_size_mask); \
\
@@ -850,10 +749,10 @@
/* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
} else if (((set1 + 1) & (L.sets-1)) == set2) { \
Int miss1=0, miss2=0; /* 0: L1 hit, 1:L1 miss, 2:L2 miss */ \
- set = &(L.tags[set1 << L.assoc_bits]); \
+ set = &(L.tags[set1 * L.assoc]); \
use_mask = L.line_start_mask[a & L.line_size_mask]; \
if (tag == (set[0] & L.tag_mask)) { \
- idx = (set1 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ idx = (set1 * L.assoc) + (set[0] & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -868,7 +767,7 @@
set[j] = set[j - 1]; \
} \
set[0] = tmp_tag; \
- idx = (set1 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ idx = (set1 * L.assoc) + (tmp_tag & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -882,15 +781,15 @@
set[j] = set[j - 1]; \
} \
set[0] = tag | tmp_tag; \
- idx = (set1 << L.assoc_bits) | tmp_tag; \
+ idx = (set1 * L.assoc) + tmp_tag; \
miss1 = update_##L##_use(&L, idx, \
use_mask, a &~ L.line_size_mask); \
block2: \
- set = &(L.tags[set2 << L.assoc_bits]); \
+ set = &(L.tags[set2 * L.assoc]); \
use_mask = L.line_end_mask[(a+size-1) & L.line_size_mask]; \
tag2 = (a+size-1) & L.tag_mask; \
if (tag2 == (set[0] & L.tag_mask)) { \
- idx = (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask); \
+ idx = (set2 * L.assoc) + (set[0] & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -905,7 +804,7 @@
set[j] = set[j - 1]; \
} \
set[0] = tmp_tag; \
- idx = (set2 << L.assoc_bits) | (tmp_tag & ~L.tag_mask); \
+ idx = (set2 * L.assoc) + (tmp_tag & ~L.tag_mask); \
L.use[idx].count ++; \
L.use[idx].mask |= use_mask; \
CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): %x => %08x, count %d\n",\
@@ -919,7 +818,7 @@
set[j] = set[j - 1]; \
} \
set[0] = tag2 | tmp_tag; \
- idx = (set2 << L.assoc_bits) | tmp_tag; \
+ idx = (set2 * L.assoc) + tmp_tag; \
miss2 = update_##L##_use(&L, idx, \
use_mask, (a+size-1) &~ L.line_size_mask); \
return (miss1==MemAccess || miss2==MemAccess) ? MemAccess:L2_Hit; \
@@ -984,7 +883,7 @@
CacheModelResult cacheuse_L2_access(Addr memline, line_loaded* l1_loaded)
{
UInt setNo = (memline >> L2.line_size_bits) & (L2.sets_min_1);
- UWord* set = &(L2.tags[setNo << L2.assoc_bits]);
+ UWord* set = &(L2.tags[setNo * L2.assoc]);
UWord tag = memline & L2.tag_mask;
int i, j, idx;
@@ -993,7 +892,7 @@
CLG_DEBUG(6,"L2.Acc(Memline %#lx): Set %d\n", memline, setNo);
if (tag == (set[0] & L2.tag_mask)) {
- idx = (setNo << L2.assoc_bits) | (set[0] & ~L2.tag_mask);
+ idx = (setNo * L2.assoc) + (set[0] & ~L2.tag_mask);
l1_loaded->dep_use = &(L2.use[idx]);
CLG_DEBUG(6," Hit0 [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
@@ -1008,7 +907,7 @@
set[j] = set[j - 1];
}
set[0] = tmp_tag;
- idx = (setNo << L2.assoc_bits) | (tmp_tag & ~L2.tag_mask);
+ idx = (setNo * L2.assoc) + (tmp_tag & ~L2.tag_mask);
l1_loaded->dep_use = &(L2.use[idx]);
CLG_DEBUG(6," Hit%d [idx %d] (line %#lx from %#lx): => %08x, count %d\n",
@@ -1024,7 +923,7 @@
set[j] = set[j - 1];
}
set[0] = tag | tmp_tag;
- idx = (setNo << L2.assoc_bits) | tmp_tag;
+ idx = (setNo * L2.assoc) + tmp_tag;
l1_loaded->dep_use = &(L2.use[idx]);
update_L2_use(idx, memline);
@@ -1380,23 +1279,16 @@
static
void check_cache(cache_t* cache, Char *name)
{
- /* First check they're all powers of two */
- if (-1 == VG_(log2)(cache->size)) {
+ /* Simulator requires line size and set count to be powers of two */
+ if (( cache->size % (cache->line_size * cache->assoc) != 0) ||
+ (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc))) {
VG_(message)(Vg_UserMsg,
- "error: %s size of %dB not a power of two; aborting.",
- name, cache->size);
- VG_(exit)(1);
+ "error: %s set count not a power of two; aborting.",
+ name);
}
- if (-1 == VG_(log2)(cache->assoc)) {
+ if (-1 == VG_(log2)(cache->line_size)) {
VG_(message)(Vg_UserMsg,
- "error: %s associativity of %d not a power of two; aborting.",
- name, cache->assoc);
- VG_(exit)(1);
- }
-
- if (-1 == VG_(log2)(cache->line_size)) {
- VG_(message)(Vg_UserMsg,
"error: %s line size of %dB not a power of two; aborting.",
name, cache->line_size);
VG_(exit)(1);
Modified: branches/VALGRIND_3_4_BRANCH/callgrind/tests/Makefile.am
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/Makefile.am 2009-01-29 09:57:22 UTC (rev 9087)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/Makefile.am 2009-01-29 10:14:53 UTC (rev 9088)
@@ -11,6 +11,10 @@
simwork1.vgtest simwork1.stdout.exp simwork1.stderr.exp \
simwork2.vgtest simwork2.stdout.exp simwork2.stderr.exp \
simwork3.vgtest simwork3.stdout.exp simwork3.stderr.exp \
+ notpower2.vgtest notpower2.stderr.exp \
+ notpower2-wb.vgtest notpower2-wb.stderr.exp \
+ notpower2-hwpref.vgtest notpower2-hwpref.stderr.exp \
+ notpower2-use.vgtest notpower2-use.stderr.exp \
threads.vgtest threads.stderr.exp
check_PROGRAMS = clreq simwork threads
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.stderr.exp (from rev 9080, trunk/callgrind/tests/notpower2-hwpref.stderr.exp)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.stderr.exp (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.stderr.exp 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,20 @@
+
+
+Events : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw
+Collected :
+
+I refs:
+I1 misses:
+L2i misses:
+I1 miss rate:
+L2i miss rate:
+
+D refs:
+D1 misses:
+L2d misses:
+D1 miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.vgtest (from rev 9080, trunk/callgrind/tests/notpower2-hwpref.vgtest)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.vgtest (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-hwpref.vgtest 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --simulate-hwpref=yes
+cleanup: rm callgrind.out.*
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.stderr.exp (from rev 9080, trunk/callgrind/tests/notpower2-use.stderr.exp)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.stderr.exp (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.stderr.exp 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,20 @@
+
+
+Events : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw AcCost1 SpLoss1 AcCost2 SpLoss2
+Collected :
+
+I refs:
+I1 misses:
+L2i misses:
+I1 miss rate:
+L2i miss rate:
+
+D refs:
+D1 misses:
+L2d misses:
+D1 miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.vgtest (from rev 9080, trunk/callgrind/tests/notpower2-use.vgtest)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.vgtest (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-use.vgtest 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --cacheuse=yes
+cleanup: rm callgrind.out.*
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.stderr.exp (from rev 9080, trunk/callgrind/tests/notpower2-wb.stderr.exp)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.stderr.exp (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.stderr.exp 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,20 @@
+
+
+Events : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw I2dmr D2dmr D2dmw
+Collected :
+
+I refs:
+I1 misses:
+L2i misses:
+I1 miss rate:
+L2i miss rate:
+
+D refs:
+D1 misses:
+L2d misses:
+D1 miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.vgtest (from rev 9080, trunk/callgrind/tests/notpower2-wb.vgtest)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.vgtest (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2-wb.vgtest 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64 --simulate-wb=yes
+cleanup: rm callgrind.out.*
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.stderr.exp (from rev 9080, trunk/callgrind/tests/notpower2.stderr.exp)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.stderr.exp (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.stderr.exp 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,20 @@
+
+
+Events : Ir Dr Dw I1mr D1mr D1mw I2mr D2mr D2mw
+Collected :
+
+I refs:
+I1 misses:
+L2i misses:
+I1 miss rate:
+L2i miss rate:
+
+D refs:
+D1 misses:
+L2d misses:
+D1 miss rate:
+L2d miss rate:
+
+L2 refs:
+L2 misses:
+L2 miss rate:
Copied: branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.vgtest (from rev 9080, trunk/callgrind/tests/notpower2.vgtest)
===================================================================
--- branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.vgtest (rev 0)
+++ branches/VALGRIND_3_4_BRANCH/callgrind/tests/notpower2.vgtest 2009-01-29 10:14:53 UTC (rev 9088)
@@ -0,0 +1,3 @@
+prog: ../../tests/true
+vgopts: --I1=32768,8,64 --D1=24576,6,64 --L2=3145728,12,64
+cleanup: rm callgrind.out.*
|