|
From: <sv...@va...> - 2011-06-10 15:09:20
|
Author: tom
Date: 2011-06-10 16:04:22 +0100 (Fri, 10 Jun 2011)
New Revision: 11810
Log:
Teach cachegrind/callgrind how to parse the cache description
in the CPUID data on recent Intel processors.
Modified:
trunk/cachegrind/cg-x86-amd64.c
trunk/coregrind/m_cpuid.S
trunk/coregrind/m_machine.c
trunk/include/pub_tool_cpuid.h
Modified: trunk/cachegrind/cg-x86-amd64.c
===================================================================
--- trunk/cachegrind/cg-x86-amd64.c 2011-06-09 12:26:42 UTC (rev 11809)
+++ trunk/cachegrind/cg-x86-amd64.c 2011-06-10 15:04:22 UTC (rev 11810)
@@ -66,7 +66,7 @@
Int family;
Int model;
UChar info[16];
- Int i, trials;
+ Int i, j, trials;
Bool L2_found = False;
/* If we see L3 cache info, copy it into L3c. Then, at the end,
copy it into *LLc. Hence if a L3 cache is specified, *LLc will
@@ -83,13 +83,13 @@
}
/* family/model needed to distinguish code reuse (currently 0x49) */
- VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
+ VG_(cpuid)(1, 0, &cpuid1_eax, &cpuid1_ignore,
&cpuid1_ignore, &cpuid1_ignore);
family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
- VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],
- (Int*)&info[8], (Int*)&info[12]);
+ VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4],
+ (Int*)&info[8], (Int*)&info[12]);
trials = info[0] - 1; /* AL register - bits 0..7 of %eax */
info[0] = 0x0; /* reset AL */
@@ -237,6 +237,61 @@
case 0xf0: case 0xf1:
break;
+ case 0xff:
+ j = 0;
+ VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
+ (Int*)&info[8], (Int*)&info[12]);
+
+ while ((info[0] & 0x1f) != 0) {
+ UInt assoc = ((*(UInt *)&info[4] >> 22) & 0x3ff) + 1;
+ UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1;
+ UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1;
+ UInt sets = *(UInt *)&info[8] + 1;
+ cache_t c;
+
+ c.size = assoc * parts * line_size * sets / 1024;
+ c.assoc = assoc;
+ c.line_size = line_size;
+
+ switch ((info[0] & 0xe0) >> 5)
+ {
+ case 1:
+ switch (info[0] & 0x1f)
+ {
+ case 1: *D1c = c; break;
+ case 2: *I1c = c; break;
+ case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break;
+ default: VG_(dmsg)("warning: L1 cache of unknown type ignored\n"); break;
+ }
+ break;
+ case 2:
+ switch (info[0] & 0x1f)
+ {
+ case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break;
+ case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n"); break;
+ case 3: *LLc = c; L2_found = True; break;
+ default: VG_(dmsg)("warning: L2 cache of unknown type ignored\n"); break;
+ }
+ break;
+ case 3:
+ switch (info[0] & 0x1f)
+ {
+ case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break;
+ case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n"); break;
+ case 3: L3c = c; L3_found = True; break;
+ default: VG_(dmsg)("warning: L3 cache of unknown type ignored\n"); break;
+ }
+ break;
+ default:
+ VG_(dmsg)("warning: L%u cache ignored\n", (info[0] & 0xe0) >> 5);
+ break;
+ }
+
+ VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
+ (Int*)&info[8], (Int*)&info[12]);
+ }
+ break;
+
default:
VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n",
info[i]);
@@ -311,7 +366,7 @@
UInt dummy, model;
UInt I1i, D1i, L2i, L3i;
- VG_(cpuid)(0x80000000, &ext_level, &dummy, &dummy, &dummy);
+ VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy);
if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n",
@@ -319,10 +374,10 @@
return -1;
}
- VG_(cpuid)(0x80000005, &dummy, &dummy, &D1i, &I1i);
- VG_(cpuid)(0x80000006, &dummy, &dummy, &L2i, &L3i);
+ VG_(cpuid)(0x80000005, 0, &dummy, &dummy, &D1i, &I1i);
+ VG_(cpuid)(0x80000006, 0, &dummy, &dummy, &L2i, &L3i);
- VG_(cpuid)(0x1, &model, &dummy, &dummy, &dummy);
+ VG_(cpuid)(0x1, 0, &model, &dummy, &dummy, &dummy);
/* Check for Duron bug */
if (model == 0x630) {
@@ -367,7 +422,7 @@
return -1;
}
- VG_(cpuid)(0, &level, (int*)&vendor_id[0],
+ VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0],
(int*)&vendor_id[8], (int*)&vendor_id[4]);
vendor_id[12] = '\0';
Modified: trunk/coregrind/m_cpuid.S
===================================================================
--- trunk/coregrind/m_cpuid.S 2011-06-09 12:26:42 UTC (rev 11809)
+++ trunk/coregrind/m_cpuid.S 2011-06-10 15:04:22 UTC (rev 11810)
@@ -66,7 +66,7 @@
#endif
/*
- void VG_(cpuid)(UInt eax,
+ void VG_(cpuid)(UInt eax, UInt ecx,
UInt* eax_ret, UInt* ebx_ret, UInt* ecx_ret, UInt* edx_ret)
*/
#if defined(VGA_x86)
@@ -81,23 +81,24 @@
pushl %edx
pushl %esi
movl 8(%ebp), %eax
+ movl 12(%ebp), %ecx
cpuid
- movl 12(%ebp), %esi
+ movl 16(%ebp), %esi
testl %esi, %esi
jz 1f
movl %eax, (%esi)
1:
- movl 16(%ebp), %esi
+ movl 20(%ebp), %esi
testl %esi, %esi
jz 2f
movl %ebx, (%esi)
2:
- movl 20(%ebp), %esi
+ movl 24(%ebp), %esi
testl %esi, %esi
jz 3f
movl %ecx, (%esi)
3:
- movl 24(%ebp), %esi
+ movl 28(%ebp), %esi
testl %esi, %esi
jz 4f
movl %edx, (%esi)
@@ -118,13 +119,14 @@
movq %rsp, %rbp
pushq %rbx
movl %edi, %eax
- movq %rdx, %rdi
- movq %rcx, %r9
+ movq %rcx, %rdi
+ movl %esi, %ecx
+ movq %rdx, %rsi
/*
eax_ret now in %rsi
ebx_ret now in %rdi
- ecx_ret now in %r9
- edx_ret now in %r8
+ ecx_ret now in %r8
+ edx_ret now in %r9
*/
cpuid
testq %rsi, %rsi
@@ -135,13 +137,13 @@
jz 2f
movl %ebx, (%rdi)
2:
- testq %r9, %r9
+ testq %r8, %r8
jz 3f
- movl %ecx, (%r9)
+ movl %ecx, (%r8)
3:
- testq %r8, %r8
+ testq %r9, %r9
jz 4f
- movl %edx, (%r8)
+ movl %edx, (%r9)
4:
popq %rbx
movq %rbp, %rsp
Modified: trunk/coregrind/m_machine.c
===================================================================
--- trunk/coregrind/m_machine.c 2011-06-09 12:26:42 UTC (rev 11809)
+++ trunk/coregrind/m_machine.c 2011-06-10 15:04:22 UTC (rev 11810)
@@ -621,7 +621,7 @@
/* we can't do cpuid at all. Give up. */
return False;
- VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
if (eax < 1)
/* we can't ask for cpuid(x) for x > 0. Give up. */
return False;
@@ -633,11 +633,11 @@
VG_(memcpy)(&vstr[8], &ecx, 4);
vstr[12] = 0;
- VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
max_extended = eax;
/* get capabilities bits into edx */
- VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
@@ -653,7 +653,7 @@
have_lzcnt = False;
if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
&& max_extended >= 0x80000001) {
- VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
}
@@ -691,7 +691,7 @@
/* we can't do cpuid at all. Give up. */
return False;
- VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
if (eax < 1)
/* we can't ask for cpuid(x) for x > 0. Give up. */
return False;
@@ -703,11 +703,11 @@
VG_(memcpy)(&vstr[8], &ecx, 4);
vstr[12] = 0;
- VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
max_extended = eax;
/* get capabilities bits into edx */
- VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
// we assume that SSE1 and SSE2 are available by default
have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
@@ -729,7 +729,7 @@
have_lzcnt = False;
if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
&& max_extended >= 0x80000001) {
- VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
+ VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
}
Modified: trunk/include/pub_tool_cpuid.h
===================================================================
--- trunk/include/pub_tool_cpuid.h 2011-06-09 12:26:42 UTC (rev 11809)
+++ trunk/include/pub_tool_cpuid.h 2011-06-10 15:04:22 UTC (rev 11810)
@@ -34,7 +34,7 @@
#if defined(VGA_x86) || defined(VGA_amd64)
extern Bool VG_(has_cpuid) ( void );
-extern void VG_(cpuid) ( UInt eax,
+extern void VG_(cpuid) ( UInt eax, UInt ecx,
UInt* eax_ret, UInt* ebx_ret,
UInt* ecx_ret, UInt* edx_ret );
#endif
|