|
From: <sv...@va...> - 2006-11-23 13:04:36
|
Author: weidendo
Date: 2006-11-23 13:04:30 +0000 (Thu, 23 Nov 2006)
New Revision: 6369
Log:
Cachegrind/Callgrind: Fix cache parameter detection
On Intel processors, CPUIDs cache parameter code 0x49 is
reused both for L2 and L3 parameters.
Thanks to Ulrich Drepper.
Modified:
trunk/cachegrind/cg-amd64.c
trunk/cachegrind/cg-x86.c
Modified: trunk/cachegrind/cg-amd64.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/cachegrind/cg-amd64.c 2006-11-22 21:07:10 UTC (rev 6368)
+++ trunk/cachegrind/cg-amd64.c 2006-11-23 13:04:30 UTC (rev 6369)
@@ -52,11 +52,16 @@
=20
/* Intel method is truly wretched. We have to do an insane indexing int=
o an
* array of pre-defined configurations for various parts of the memory
- * hierarchy.=20
+ * hierarchy.
+ * According to Intel Processor Identification, App Note 485.
*/
static
Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c=
)
{
+ Int cpuid1_eax;
+ Int cpuid1_ignore;
+ Int family;
+ Int model;
UChar info[16];
Int i, trials;
Bool L2_found =3D False;
@@ -68,6 +73,12 @@
return -1;
}
=20
+ /* family/model needed to distinguish code reuse (currently 0x49) */
+ VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
+ &cpuid1_ignore, &cpuid1_ignore);
+ family =3D (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & =
0xf);
+ model =3D (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0=
xf);
+
VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],=20
(Int*)&info[8], (Int*)&info[12]);
trials =3D info[0] - 1; /* AL register - bits 0..7 of %eax */
@@ -110,7 +121,7 @@
=20
case 0x22: case 0x23: case 0x25: case 0x29: case 0x46: case 0x47:
VG_(message)(Vg_DebugMsg,=20
- "warning: L3 cache detected but ignored\n");
+ "warning: L3 cache detected but ignored");
break;
=20
/* These are sectored, whatever that means */
@@ -129,7 +140,14 @@
case 0x43: *L2c =3D (cache_t) { 512, 4, 32 }; L2_found =3D True; =
break;
case 0x44: *L2c =3D (cache_t) { 1024, 4, 32 }; L2_found =3D True; =
break;
case 0x45: *L2c =3D (cache_t) { 2048, 4, 32 }; L2_found =3D True; =
break;
- case 0x49: *L2c =3D (cache_t) { 4096,16, 64 }; L2_found =3D True; =
break;
+ case 0x49:
+ if ((family =3D=3D 15) && (model =3D=3D 6))
+ /* On Xeon MP (family F, model 6), this is for L3 */
+ VG_(message)(Vg_DebugMsg,=20
+ "warning: L3 cache detected but ignored\n");
+ else
+ *L2c =3D (cache_t) { 4096, 16, 64 }; L2_found =3D True;
+ break;
=20
/* These are sectored, whatever that means */
case 0x60: *D1c =3D (cache_t) { 16, 8, 64 }; break; /* secto=
red */
Modified: trunk/cachegrind/cg-x86.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/cachegrind/cg-x86.c 2006-11-22 21:07:10 UTC (rev 6368)
+++ trunk/cachegrind/cg-x86.c 2006-11-23 13:04:30 UTC (rev 6369)
@@ -52,11 +52,16 @@
=20
/* Intel method is truly wretched. We have to do an insane indexing int=
o an
* array of pre-defined configurations for various parts of the memory
- * hierarchy.=20
+ * hierarchy.
+ * According to Intel Processor Identification, App Note 485.
*/
static
Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c=
)
{
+ Int cpuid1_eax;
+ Int cpuid1_ignore;
+ Int family;
+ Int model;
UChar info[16];
Int i, trials;
Bool L2_found =3D False;
@@ -68,6 +73,12 @@
return -1;
}
=20
+ /* family/model needed to distinguish code reuse (currently 0x49) */
+ VG_(cpuid)(1, &cpuid1_eax, &cpuid1_ignore,
+ &cpuid1_ignore, &cpuid1_ignore);
+ family =3D (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & =
0xf);
+ model =3D (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0=
xf);
+
VG_(cpuid)(2, (Int*)&info[0], (Int*)&info[4],=20
(Int*)&info[8], (Int*)&info[12]);
trials =3D info[0] - 1; /* AL register - bits 0..7 of %eax */
@@ -109,7 +120,8 @@
VG_(tool_panic)("IA-64 cache detected?!");
=20
case 0x22: case 0x23: case 0x25: case 0x29: case 0x46: case 0x47:
- VG_(message)(Vg_DebugMsg, "warning: L3 cache detected but igno=
red");
+ VG_(message)(Vg_DebugMsg,
+ "warning: L3 cache detected but ignored");
break;
=20
/* These are sectored, whatever that means */
@@ -128,7 +140,14 @@
case 0x43: *L2c =3D (cache_t) { 512, 4, 32 }; L2_found =3D True; =
break;
case 0x44: *L2c =3D (cache_t) { 1024, 4, 32 }; L2_found =3D True; =
break;
case 0x45: *L2c =3D (cache_t) { 2048, 4, 32 }; L2_found =3D True; =
break;
- case 0x49: *L2c =3D (cache_t) { 4096,16, 64 }; L2_found =3D True; =
break;
+ case 0x49:
+ if ((family =3D=3D 15) && (model =3D=3D 6))
+ /* On Xeon MP (family F, model 6), this is for L3 */
+ VG_(message)(Vg_DebugMsg,=20
+ "warning: L3 cache detected but ignored\n");
+ else
+ *L2c =3D (cache_t) { 4096, 16, 64 }; L2_found =3D True;
+ break;
=20
/* These are sectored, whatever that means */
case 0x60: *D1c =3D (cache_t) { 16, 8, 64 }; break; /* secto=
red */
|
|
From: Josef W. <Jos...@gm...> - 2006-11-23 16:11:05
|
On Thursday 23 November 2006 14:04, sv...@va... wrote: > Author: weidendo > Date: 2006-11-23 13:04:30 +0000 (Thu, 23 Nov 2006) > New Revision: 6369 > > Log: > Cachegrind/Callgrind: Fix cache parameter detection I think we really should get rid of the total duplication of code in cg-amd64.c and cg-x86.c. They are currently more or less exactly the same. Whatever new additions are needed, they will be needed for both files, as any 64bit x86 processor can also run in 32bit mode. How to do it best? I suggest moving the code into a new cg-x86-amd64.c and include it both in cg-amd64.c and cg-x86.c. Josef |
|
From: Julian S. <js...@ac...> - 2006-11-23 16:39:26
|
In principle I agree with Josef, but I have a different suggestion for implementation. The core module m_machine exists precisely to give a convenient place to record/discover various aspects of the CPU we are running on. And so I think the cache detection stuff should all be moved in there. J On Thursday 23 November 2006 16:10, Josef Weidendorfer wrote: > On Thursday 23 November 2006 14:04, sv...@va... wrote: > > Author: weidendo > > Date: 2006-11-23 13:04:30 +0000 (Thu, 23 Nov 2006) > > New Revision: 6369 > > > > Log: > > Cachegrind/Callgrind: Fix cache parameter detection > > I think we really should get rid of the total duplication > of code in cg-amd64.c and cg-x86.c. They are currently > more or less exactly the same. > > Whatever new additions are needed, they will be needed for > both files, as any 64bit x86 processor can also run in > 32bit mode. > > How to do it best? > I suggest moving the code into a new cg-x86-amd64.c and > include it both in cg-amd64.c and cg-x86.c. > > Josef > > ------------------------------------------------------------------------- > Take Surveys. Earn Cash. Influence the Future of IT > Join SourceForge.net's Techsay panel and you'll get the chance to share > your opinions on IT & business topics through brief surveys - and earn cash > http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV > _______________________________________________ > Valgrind-developers mailing list > Val...@li... > https://lists.sourceforge.net/lists/listinfo/valgrind-developers |
|
From: Nicholas N. <nj...@cs...> - 2006-11-23 22:59:20
|
On Thu, 23 Nov 2006, Julian Seward wrote: > In principle I agree with Josef, but I have a different suggestion > for implementation. The core module m_machine exists precisely to > give a convenient place to record/discover various aspects of the > CPU we are running on. And so I think the cache detection stuff > should all be moved in there. Maybe, but Josef's change is simpler and more likely to occur... N |