From: John L. <mov...@us...> - 2001-08-19 20:09:20
|
Update of /cvsroot/oprofile/oprofile In directory usw-pr-cvs1:/tmp/cvs-serv3584 Modified Files: ChangeLog README TODO op_events.c op_init.c oprofile.c oprofile.h Log Message: the athlon patch. more work needed, but it works. Thanks Dave !! Index: ChangeLog =================================================================== RCS file: /cvsroot/oprofile/oprofile/ChangeLog,v retrieving revision 1.85 retrieving revision 1.86 diff -u -d -r1.85 -r1.86 --- ChangeLog 2001/08/19 18:50:54 1.85 +++ ChangeLog 2001/08/19 20:09:17 1.86 @@ -1,3 +1,19 @@ +2001-08-19 John Levon <mo...@co...> + + * op_init.c: + * oprofile.h: + * oprofile.c: small tidies of the Athlon support + +2001-08-19 Dave Jones <da...@su...> + + * op_events.c: + * op_init.c: + * oprofile.h: + * oprofile.c: + * dae/op_start: + * dae/oprofiled.h: + * dae/oprofiled.c: initial Athlon support + 2001-08-19 Philippe Elie <ph...@cl...> * oprofile.c: use symbolic constant for all apic setup, no generated Index: README =================================================================== RCS file: /cvsroot/oprofile/oprofile/README,v retrieving revision 1.11 retrieving revision 1.12 diff -u -d -r1.11 -r1.12 --- README 2001/08/11 01:38:29 1.11 +++ README 2001/08/19 20:09:17 1.12 @@ -11,5 +11,6 @@ wrote the tcl/tk oprofile interface and the source annotation facility, and more. -Dave Jones <da...@su...> and Bob Montgomery <bo...@fc...> provided -bug fixes and useful testing. +Dave Jones <da...@su...> provided bug fixes and the Athlon support. + +Bob Montgomery <bo...@fc...> provided bug fixes and useful testing. Index: TODO =================================================================== RCS file: /cvsroot/oprofile/oprofile/TODO,v retrieving revision 1.11 retrieving revision 1.12 diff -u -d -r1.11 -r1.12 --- TODO 2001/08/19 18:50:54 1.11 +++ TODO 2001/08/19 20:09:17 1.12 @@ -2,8 +2,7 @@ ----------- o Save counter state and restore on a finish -o Athlon: values in oprofile.c not op_init.c, bit 22 is per-counter, determine working event types, - fix ctr_overflowed(), fix set_perfctr to set high to -1 +o Athlon: bit 22 is per-counter, determine working event types, support 4 counters, documentation o SMP mappings - place in all buffers perhaps ? o check chroot() processes and the path hash stuff o There is no need for the daemon to run as root as long as every binary image Index: op_events.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/op_events.c,v retrieving revision 1.19 retrieving revision 1.20 diff -u -d -r1.19 -r1.20 --- op_events.c 2001/07/21 22:53:38 1.19 +++ op_events.c 2001/08/19 20:09:17 1.20 @@ -26,6 +26,7 @@ #define OP_PII_PIII 3 #define OP_PII_ONLY 4 #define OP_PIII_ONLY 5 +#define OP_ATHLON_ONLY 6 #ifdef __KERNEL__ #include <linux/string.h> @@ -87,13 +88,39 @@ { 2, utm_bitmask, { 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, }; +/* Allowed, Event #, unit mask, name, minimum event value */ static struct op_event op_events[] = { /* Data Cache Unit (DCU) */ + {OP_ATHLON_ONLY,0x40,0,"DATA_CACHE_ACCESSES", 500,}, + {OP_ATHLON_ONLY,0x41,0,"DATA_CACHE_MISSES", 500,}, + {OP_ATHLON_ONLY,0x42,0,"DATA_CACHE_REFILLS_FROM_L2", 500,}, + {OP_ATHLON_ONLY,0x43,0,"DATA_CACHE_REFILLS_FROM_SYSTEM", 500,}, + {OP_ATHLON_ONLY,0x44,0,"DATA_CACHE_WRITEBACKS", 500,}, + {OP_ATHLON_ONLY,0x45,0,"L1_DTLB_MISSES_L2_DTLD_HITS", 500,}, + {OP_ATHLON_ONLY,0x46,0,"L1_AND_L2_DTLB_MISSES", 500,}, + {OP_ATHLON_ONLY,0x47,0,"MISALIGNED_DATA_REFS", 500,}, + {OP_ATHLON_ONLY,0x80,0,"ICACHE_FETCHES", 500,}, + {OP_ATHLON_ONLY,0x81,0,"ICACHE_MISSES", 500,}, + {OP_ATHLON_ONLY,0x84,0,"L1_ITLB_MISSES_L2_ITLB_HITS", 500,}, + {OP_ATHLON_ONLY,0x85,0,"L1_AND_L2_ITLB_MISSES", 500,}, + {OP_ATHLON_ONLY,0xc0,0,"RETIRED_INSNS", 500,}, + {OP_ATHLON_ONLY,0xc1,0,"RETIRED_OPS", 500,}, + {OP_ATHLON_ONLY,0xc2,0,"RETIRED_BRANCHES", 500,}, + {OP_ATHLON_ONLY,0xc3,0,"RETIRED_BRANCHES_MISPREDICTED", 500,}, + {OP_ATHLON_ONLY,0xc4,0,"RETIRED_TAKEN_BRANCHES", 500,}, + {OP_ATHLON_ONLY,0xc5,0,"RETIRED_TAKEN_BRANCHES_MISPREDICTED", 500,}, + {OP_ATHLON_ONLY,0xc6,0,"RETIRED_FAR_CONTROL_TRANSFERS", 500,}, + {OP_ATHLON_ONLY,0xc7,0,"RETIRED_RESYNC_BRANCHES", 500,}, + {OP_ATHLON_ONLY,0xcd,0,"INTERRUPTS_MASKED", 500,}, + {OP_ATHLON_ONLY,0xce,0,"INTERRUPTS_MASKED_PENDING", 500,}, + {OP_ATHLON_ONLY,0xcf,0,"HARDWARE_INTERRUPTS", 500,}, + {OP_ANY,0x43,0,"DATA_MEM_REFS", 500 }, {OP_ANY,0x45,0,"DCU_LINES_IN", 500 }, {OP_ANY,0x46,0,"DCU_M_LINES_IN", 500 }, {OP_ANY,0x47,0,"DCU_M_LINES_OUT", 500}, {OP_ANY,0x48,0,"DCU_MISS_OUTSTANDING", 500 }, + /* Intruction Fetch Unit (IFU) */ {OP_ANY,0x80,0,"IFU_IFETCH", 500 }, {OP_ANY,0x81,0,"IFU_IFETCH_MISS", 500 }, @@ -201,6 +228,8 @@ #define OP_CTR1_PII_EVENT 0x80 #define OP_CTR0_PIII_EVENT 0x100 #define OP_CTR1_PIII_EVENT 0x200 +#define OP_CTR0_ATHLON_EVENT 0x400 +#define OP_CTR1_ATHLON_EVENT 0x800 /** * op_check_unit_mask - sanity check unit mask value @@ -291,6 +320,8 @@ * * 2 Pentium III * + * 3 AMD Athlon + * * Use 0 values for @ctr0_type and @ctr1_type if the * counter is not used. * @@ -325,6 +356,11 @@ if (!proc) ret |= OP_CTR0_PII_EVENT; break; + + case OP_ATHLON_ONLY: + if (proc != 3) + ret |= OP_CTR0_ATHLON_EVENT; + break; default: break; } @@ -359,6 +395,12 @@ if (!proc) ret |= OP_CTR1_PII_EVENT; break; + + case OP_ATHLON_ONLY: + if (proc != 3) + ret |= OP_CTR1_ATHLON_EVENT; + break; + default: break; } @@ -397,6 +439,8 @@ * * 2 Pentium III * + * 3 AMD Athlon + * * Use "" strings for @ctr0_type and @ctr1_type if the * counter is not used. * @@ -702,6 +746,7 @@ case 3: printf("- Pentium II/III only\n"); break; case 4: printf("- Pentium II only\n"); break; case 5: printf("- Pentium III only\n"); break; + case 6: printf("- AMD Athlon only\n"); break; default: printf("\n"); break; } if (op_events[i].unit) { Index: op_init.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/op_init.c,v retrieving revision 1.5 retrieving revision 1.6 diff -u -d -r1.5 -r1.6 --- op_init.c 2001/06/22 00:19:31 1.5 +++ op_init.c 2001/08/19 20:09:17 1.6 @@ -26,21 +26,26 @@ /* we want to include all P6 processors (i.e. > Pentium Classic, * < Pentium IV */ - if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL || - current_cpu_data.x86 != 6) { - printk(KERN_ERR "oprofile: not an Intel P6 processor. Sorry.\n"); - return 0; + if ((current_cpu_data.x86_vendor != X86_VENDOR_INTEL && + current_cpu_data.x86 != 6) || + (current_cpu_data.x86_vendor != X86_VENDOR_AMD && + current_cpu_data.x86 != 6)) { + printk(KERN_ERR "oprofile: not an Intel P6 or AMD Athlon processor. Sorry.\n"); + return CPU_NO_GOOD; } - /* 0 if PPro, 1 if PII, 2 if PIII */ - cpu_type = (current_cpu_data.x86_model > 5) ? 2 : - (current_cpu_data.x86_model > 2); - return 1; + /* 0 if PPro, 1 if PII, 2 if PIII, 3 if Athlon */ + if (current_cpu_data.x86_vendor == X86_VENDOR_AMD) + cpu_type = CPU_ATHLON; + else + cpu_type = (current_cpu_data.x86_model > 5) ? CPU_PIII : + (current_cpu_data.x86_model > 2); + return cpu_type; } int __init stub_init(void) { - if (!hw_ok()) + if (hw_ok() == CPU_NO_GOOD) return -EINVAL; return oprof_init(); Index: oprofile.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/oprofile.c,v retrieving revision 1.72 retrieving revision 1.73 diff -u -d -r1.72 -r1.73 --- oprofile.c 2001/08/19 18:50:54 1.72 +++ oprofile.c 2001/08/19 20:09:17 1.73 @@ -34,9 +34,20 @@ static int op_ctr_val[OP_MAX_COUNTERS]; static int op_ctr_kernel[OP_MAX_COUNTERS]; static int op_ctr_user[OP_MAX_COUNTERS]; + pid_t pid_filter; pid_t pgrp_filter; +/* the MSRs we need */ +uint ctrlreg0 = MSR_IA32_EVNTSEL0; +uint ctrlreg1 = MSR_IA32_EVNTSEL1; +uint ctrlreg2 = MSR_K7_PERFCTL2; +uint ctrlreg3 = MSR_K7_PERFCTL3; +uint ctrreg0 = MSR_IA32_PERFCTR0; +uint ctrreg1 = MSR_IA32_PERFCTR1; +uint ctrreg2 = MSR_K7_PERFCTR2; +uint ctrreg3 = MSR_K7_PERFCTR3; + u32 prof_on __cacheline_aligned; static int op_major; @@ -50,6 +61,15 @@ extern spinlock_t map_lock; +extern unsigned int ctrlreg0; +extern unsigned int ctrlreg1; +extern unsigned int ctrlreg2; +extern unsigned int ctrlreg3; +extern unsigned int ctrreg0; +extern unsigned int ctrreg1; +extern unsigned int ctrreg2; +extern unsigned int ctrreg3; + /* ---------------- NMI handler ------------------ */ /* FIXME: this whole handler would probably be better in straight asm */ @@ -313,6 +333,7 @@ return 0; } + /* ugly hack */ /* PHE : the memory must be uncachable! this is perhaps more a * problem for Athlon than for PII. @@ -409,7 +430,7 @@ return 0; not_local_p6_apic: - printk(KERN_ERR "oprofile: no local P6 APIC\n"); + printk(KERN_ERR "oprofile: no local P6 APIC. Your laptop doesn't have one !\n"); /* IA32 V3, 7.4.2 */ rdmsr(MSR_IA32_APICBASE, msr_low, msr_high); wrmsr(MSR_IA32_APICBASE, msr_low & ~(1<<11), msr_high); @@ -438,12 +459,24 @@ { uint low, high; - rdmsr(MSR_IA32_EVNTSEL0, low, high); - wrmsr(MSR_IA32_EVNTSEL0, low & ~(1<<22), high); + // first, let's use the right MSRs + switch (cpu_type) { + case CPU_ATHLON: + ctrlreg0 = MSR_K7_PERFCTL0; + ctrlreg1 = MSR_K7_PERFCTL1; + ctrreg0 = MSR_K7_PERFCTR0; + ctrreg1 = MSR_K7_PERFCTR1; + break; + default:; + } + + rdmsr(ctrlreg0, low, high); + // FIXME: enable bit is per-counter on athlon + wrmsr(ctrlreg0, low & ~(1<<22), high); /* IA Vol. 3 Figure 15-3 */ - rdmsr(MSR_IA32_EVNTSEL0, low, high); + rdmsr(ctrlreg0, low, high); /* clear */ low &= (1<<21); @@ -452,24 +485,26 @@ pmc_fill_in(&low, op_ctr_kernel[0], op_ctr_user[0], op_ctr_val[0], op_ctr_um[0]); } - wrmsr(MSR_IA32_EVNTSEL0, low, 0); + wrmsr(ctrlreg0, low, 0); - rdmsr(MSR_IA32_EVNTSEL1, low, high); + rdmsr(ctrlreg1, low, high); /* clear */ low &= (3<<21); if (op_ctr_val[1]) { set_perfctr(op_ctr_count[1], 1); pmc_fill_in(&low, op_ctr_kernel[1], op_ctr_user[1], op_ctr_val[1], op_ctr_um[1]); - wrmsr(MSR_IA32_EVNTSEL1, low, high); + wrmsr(ctrlreg1, low, high); } /* disable ctr1 if the UP oopser might be on, but we can't do anything * interesting with the NMIs */ /* PHE FIXME Have always a meaning ? */ + /* this is pretty bogus really. especially as we don't re-enable it. + * Instead, save state set up, and restore with pmc_unsetup or similar */ #if !defined(CONFIG_X86_UP_APIC) || !defined(OP_EXPORTED_DO_NMI) - wrmsr(MSR_IA32_EVNTSEL1, low, high); + wrmsr(ctrlreg1, low, high); #endif } @@ -481,8 +516,9 @@ return; /* enable counters */ - rdmsr(MSR_IA32_EVNTSEL0, low, high); - wrmsr(MSR_IA32_EVNTSEL0, low | (1<<22), high); + rdmsr(ctrlreg0, low, high); + // FIXME: bit 22 per-counter on athlon + wrmsr(ctrlreg0, low | (1<<22), high); } static void pmc_stop(void *info) @@ -493,8 +529,9 @@ return; /* disable counters */ - rdmsr(MSR_IA32_EVNTSEL0, low, high); - wrmsr(MSR_IA32_EVNTSEL0, low & ~(1<<22), high); + rdmsr(ctrlreg0, low, high); + // FIXME: bit 22 per-counter on athlon + wrmsr(ctrlreg0, low & ~(1<<22), high); } inline static void pmc_select_start(uint cpu) @@ -847,6 +884,8 @@ if (ret & OP_CTR1_PII_EVENT) printk(KERN_ERR "oprofile: ctr1: event only available on PII\n"); if (ret & OP_CTR0_PIII_EVENT) printk(KERN_ERR "oprofile: ctr0: event only available on PIII\n"); if (ret & OP_CTR1_PIII_EVENT) printk(KERN_ERR "oprofile: ctr1: event only available on PIII\n"); + if (ret & OP_CTR0_ATHLON_EVENT) printk(KERN_ERR "oprofile: ctr1: event only available on Athlon\n"); + if (ret & OP_CTR1_ATHLON_EVENT) printk(KERN_ERR "oprofile: ctr1: event only available on Athlon\n"); if (ret) return 0; @@ -874,7 +913,7 @@ static int oprof_start(void) { int err = 0; - + down(&sysctlsem); if ((err = oprof_init_data())) @@ -885,7 +924,7 @@ err = -EINVAL; goto out; } - + if ((smp_call_function(pmc_setup, NULL, 0, 1))) { oprof_free_mem(smp_num_cpus); err = -EINVAL; @@ -893,7 +932,7 @@ } pmc_setup(NULL); - + install_nmi(); if (!kernel_only) @@ -905,7 +944,7 @@ pmc_start(NULL); prof_on = 1; - + out: up(&sysctlsem); return err; Index: oprofile.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/oprofile.h,v retrieving revision 1.45 retrieving revision 1.46 diff -u -d -r1.45 -r1.46 --- oprofile.h 2001/08/11 12:37:56 1.45 +++ oprofile.h 2001/08/19 20:09:17 1.46 @@ -53,6 +53,12 @@ #define OP_CTR_0 0x1 #define OP_CTR_1 0x2 +#define CPU_NO_GOOD -1 +#define CPU_PPRO 0 +#define CPU_PII 1 +#define CPU_PIII 2 +#define CPU_ATHLON 3 + /* MSRs */ #ifndef MSR_IA32_PERFCTR0 #define MSR_IA32_PERFCTR0 0xc1 @@ -69,6 +75,30 @@ #ifndef MSR_IA32_APICBASE #define MSR_IA32_APICBASE 0x1B #endif +#ifndef MSR_K7_PERFCTL0 +#define MSR_K7_PERFCTL0 0xc0010000 +#endif +#ifndef MSR_K7_PERFCTL1 +#define MSR_K7_PERFCTL1 0xc0010001 +#endif +#ifndef MSR_K7_PERFCTL2 +#define MSR_K7_PERFCTL2 0xc0010002 +#endif +#ifndef MSR_K7_PERFCTL3 +#define MSR_K7_PERFCTL3 0xc0010003 +#endif +#ifndef MSR_K7_PERFCTR0 +#define MSR_K7_PERFCTR0 0xc0010004 +#endif +#ifndef MSR_K7_PERFCTR1 +#define MSR_K7_PERFCTR1 0xc0010005 +#endif +#ifndef MSR_K7_PERFCTR2 +#define MSR_K7_PERFCTR2 0xc0010006 +#endif +#ifndef MSR_K7_PERFCTR3 +#define MSR_K7_PERFCTR3 0xc0010007 +#endif #ifndef APIC_SPIV_APIC_ENABLED #define APIC_SPIV_APIC_ENABLED (1<<8) @@ -114,8 +144,8 @@ ^ (ctr<<8)) & (data->hash_size - 1) /* relying on MSR numbers being neighbours */ -#define get_perfctr(l,h,c) do { rdmsr(MSR_IA32_PERFCTR0 + c, (l), (h)); } while (0) -#define set_perfctr(l,c) do { wrmsr(MSR_IA32_PERFCTR0 + c, -(u32)(l), 0); } while (0) +#define get_perfctr(l,h,c) do { rdmsr(ctrreg0 + c, (l), (h)); } while (0) +#define set_perfctr(l,c) do { wrmsr(ctrreg0 + c, -(u32)(l), -1); } while (0) #define ctr_overflowed(n) (!((n) & (1U<<31))) #define OP_EVENTS_OK 0x0 @@ -129,6 +159,8 @@ #define OP_CTR1_PII_EVENT 0x80 #define OP_CTR0_PIII_EVENT 0x100 #define OP_CTR1_PIII_EVENT 0x200 +#define OP_CTR0_ATHLON_EVENT 0x400 +#define OP_CTR1_ATHLON_EVENT 0x800 #define op_check_range(val,l,h,str) do { \ if ((val) < (l) || (val) > (h)) { \ @@ -182,6 +214,16 @@ #else #include <linux/completion.h> #endif + +// FIXME: these names are too easy to mis-type ! +extern uint ctrlreg0; +extern uint ctrlreg1; +extern uint ctrlreg2; +extern uint ctrlreg3; +extern uint ctrreg0; +extern uint ctrreg1; +extern uint ctrreg2; +extern uint ctrreg3; int oprof_init(void); void oprof_exit(void); |