From: <gr...@re...> - 2002-09-20 18:50:20
|
hi, attached is another go at the model-specific x86 backend cleanup patch, in further preparation for landing the P4 patch. it applies to the cvs head, builds and runs on my P4 with RTC mode; but that's not saying much. If someone with a ppro / athlon could confirm the changes did not disrupt those backends I'd appreciate it. there is some chance of that, with the moving-around of things. differences from last round of this patch: - some more constification - fixed some spacing / formatting - removed model-fetch call from within NMI, assume it's set - move MSR addresses to contiguous arrays to remain more cache friendly during frequent NMIs any further comments / suggestions? -graydon --- ChangeLog Fri Sep 20 14:37:12 2002 +++ ChangeLog Fri Sep 20 14:36:46 2002 @@ -1,3 +1,11 @@ +2002-09-20 Graydon Hoare <gr...@re...> + + * module/x86/op_x86_model.h: New interface for MSRs. + * module/x86/op_nmi.c: Use interface. + * module/x86/op_model_ppro.c: New, code from op_nmi.c. + * module/x86/op_model_athlon.c: New, code from op_nmi.c. + * module/x86/Makefile.in: Reflect new files. + 2002-09-20 John Levon <le...@mo...> * libutil++/op_bfd.cpp: fix a bug where we broke --- module/x86/Makefile.in Wed Sep 4 10:19:37 2002 +++ module/x86/Makefile.in Tue Sep 17 15:37:33 2002 @@ -18,7 +18,8 @@ O_TARGET := arch.o obj-y := cpu_type.o op_apic.o op_fixmap.o op_rtc.o op_nmi.o \ - op_syscalls.o oprofile_nmi.o + op_syscalls.o op_model_ppro.o op_model_athlon.o \ + oprofile_nmi.o obj-m := $(O_TARGET) O_OBJS := $(obj-y) M_OBJS := $(O_TARGET) --- module/x86/op_model_athlon.c Wed Dec 31 19:00:00 1969 +++ module/x86/op_model_athlon.c Fri Sep 20 10:49:17 2002 @@ -0,0 +1,133 @@ +/** + * @file op_model_athlon.h + * athlon / K7 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include "op_x86_model.h" +#include "op_msr.h" + +#define ATHLON_NUM_COUNTERS 4 +#define ATHLON_NUM_CONTROLS 4 + +#define ATHLON_CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0); +#define ATHLON_CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0); +#define ATHLON_CTR_SET_ACTIVE(n) (n |= (1<<22)) +#define ATHLON_CTR_SET_INACTIVE(n) (n &= ~(1<<22)) +#define ATHLON_CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define ATHLON_CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls.addrs[(c)], (l), (h));} while (0); +#define ATHLON_CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls.addrs[(c)], (l), (h));} while (0); +#define ATHLON_CTRL_CLEAR(x) (x &= (1<<21)) +#define ATHLON_CTRL_SET_ENABLE(val) (val |= 1<<20) +#define ATHLON_CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define ATHLON_CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define ATHLON_CTRL_SET_UM(val, m) (val |= (m << 8)) +#define ATHLON_CTRL_SET_EVENT(val, e) (val |= e) + +static void athlon_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters.addrs[0] = MSR_K7_PERFCTR0; + msrs->counters.addrs[1] = MSR_K7_PERFCTR1; + msrs->counters.addrs[2] = MSR_K7_PERFCTR2; + msrs->counters.addrs[3] = MSR_K7_PERFCTR3; + + msrs->controls.addrs[0] = MSR_K7_PERFCTL0; + msrs->controls.addrs[1] = MSR_K7_PERFCTL1; + msrs->controls.addrs[2] = MSR_K7_PERFCTL2; + msrs->controls.addrs[3] = MSR_K7_PERFCTL3; +} + +static void athlon_setup_ctrs(struct op_msrs const * const msrs) +{ + uint low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < ATHLON_NUM_CONTROLS; ++i) { + ATHLON_CTRL_READ (low, high, msrs, i); + ATHLON_CTRL_CLEAR (low); + ATHLON_CTRL_WRITE (low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < ATHLON_NUM_COUNTERS; ++i) { + ATHLON_CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < ATHLON_NUM_COUNTERS; ++i) { + if (sysctl.ctr[i].event) { + + ATHLON_CTR_WRITE(sysctl.ctr[i].count, msrs, i); + + ATHLON_CTRL_READ (low, high, msrs, i); + ATHLON_CTRL_CLEAR (low); + ATHLON_CTRL_SET_ENABLE (low); + ATHLON_CTRL_SET_USR (low, sysctl.ctr[i].user); + ATHLON_CTRL_SET_KERN (low, sysctl.ctr[i].kernel); + ATHLON_CTRL_SET_UM (low, sysctl.ctr[i].unit_mask); + ATHLON_CTRL_SET_EVENT (low, sysctl.ctr[i].event); + ATHLON_CTRL_WRITE (low, high, msrs, i); + } + } +} + +static void athlon_check_ctrs(uint const cpu, + struct op_msrs const * const msrs, + struct pt_regs * const regs) +{ + ulong low, high; + int i; + for (i = 0 ; i < ATHLON_NUM_COUNTERS; ++i) { + ATHLON_CTR_READ(low, high, msrs, i); + if (ATHLON_CTR_OVERFLOWED(low)) { + op_do_profile(cpu, regs, i); + ATHLON_CTR_WRITE(oprof_data[cpu].ctr_count[i], msrs, i); + } + } + +} + +static void athlon_start(struct op_msrs const * const msrs) +{ + uint low, high; + int i; + for (i = 0 ; i < ATHLON_NUM_COUNTERS ; ++i) { + if (sysctl.ctr[i].count) { + ATHLON_CTR_READ(low, high, msrs, i); + ATHLON_CTR_SET_ACTIVE(low); + ATHLON_CTR_WRITE(low, msrs, i); + } + } +} + +static void athlon_stop(struct op_msrs const * const msrs) +{ + uint low,high; + int i; + for (i = 0 ; i < ATHLON_NUM_COUNTERS ; ++i) { + if (sysctl.ctr[i].count) { + ATHLON_CTR_READ(low, high, msrs, i); + ATHLON_CTR_SET_INACTIVE(low); + ATHLON_CTR_WRITE(low, msrs, i); + } + } +} + + +struct op_x86_model_spec const op_athlon_spec = { + num_counters: ATHLON_NUM_COUNTERS, + num_controls: ATHLON_NUM_CONTROLS, + fill_in_addresses: &athlon_fill_in_addresses, + setup_ctrs: &athlon_setup_ctrs, + check_ctrs: &athlon_check_ctrs, + start: &athlon_start, + stop: &athlon_stop +}; --- module/x86/op_model_ppro.c Wed Dec 31 19:00:00 1969 +++ module/x86/op_model_ppro.c Fri Sep 20 10:49:28 2002 @@ -0,0 +1,118 @@ +/** + * @file op_model_ppro.h + * pentium pro / P6 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + */ + +#include "op_x86_model.h" + +#define PPRO_NUM_COUNTERS 2 +#define PPRO_NUM_CONTROLS 2 + +#define PPRO_CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters.addrs[(c)], (l), (h));} while (0); +#define PPRO_CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters.addrs[(c)], -(u32)(l), -1);} while (0); +#define PPRO_CTR_SET_ACTIVE(n) (n |= (1<<22)) +#define PPRO_CTR_SET_INACTIVE(n) (n &= ~(1<<22)) +#define PPRO_CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define PPRO_CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0); +#define PPRO_CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls.addrs[(c)]), (l), (h));} while (0); +#define PPRO_CTRL_CLEAR(x) (x &= (1<<21)) +#define PPRO_CTRL_SET_ENABLE(val) (val |= 1<<20) +#define PPRO_CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define PPRO_CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define PPRO_CTRL_SET_UM(val, m) (val |= (m << 8)) +#define PPRO_CTRL_SET_EVENT(val, e) (val |= e) + +static void ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters.addrs[0] = MSR_P6_PERFCTR0; + msrs->counters.addrs[1] = MSR_P6_PERFCTR1; + + msrs->controls.addrs[0] = MSR_P6_EVNTSEL0; + msrs->controls.addrs[1] = MSR_P6_EVNTSEL1; +} + +static void ppro_setup_ctrs(struct op_msrs const * const msrs) +{ + uint low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < PPRO_NUM_CONTROLS; ++i) { + PPRO_CTRL_READ (low, high, msrs, i); + PPRO_CTRL_CLEAR (low); + PPRO_CTRL_WRITE (low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < PPRO_NUM_COUNTERS; ++i) { + PPRO_CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < PPRO_NUM_COUNTERS; ++i) { + if (sysctl.ctr[i].event) { + + PPRO_CTR_WRITE(sysctl.ctr[i].count, msrs, i); + + PPRO_CTRL_READ (low, high, msrs, i); + PPRO_CTRL_CLEAR (low); + PPRO_CTRL_SET_ENABLE (low); + PPRO_CTRL_SET_USR (low, sysctl.ctr[i].user); + PPRO_CTRL_SET_KERN (low, sysctl.ctr[i].kernel); + PPRO_CTRL_SET_UM (low, sysctl.ctr[i].unit_mask); + PPRO_CTRL_SET_EVENT (low, sysctl.ctr[i].event); + PPRO_CTRL_WRITE (low, high, msrs, i); + } + } +} + +static void ppro_check_ctrs(uint const cpu, + struct op_msrs const * const msrs, + struct pt_regs * const regs) +{ + ulong low, high; + int i; + for (i = 0 ; i < PPRO_NUM_COUNTERS; ++i) { + PPRO_CTR_READ(low, high, msrs, i); + if (PPRO_CTR_OVERFLOWED(low)) { + op_do_profile(cpu, regs, i); + PPRO_CTR_WRITE(oprof_data[cpu].ctr_count[i], msrs, i); + } + } + +} + +static void ppro_start(struct op_msrs const * const msrs) +{ + uint low,high; + PPRO_CTR_READ(low, high, msrs, 0); + PPRO_CTR_SET_ACTIVE(low); + PPRO_CTR_WRITE(low, msrs, 0); +} + +static void ppro_stop(struct op_msrs const * const msrs) +{ + uint low,high; + PPRO_CTR_READ(low, high, msrs, 0); + PPRO_CTR_SET_INACTIVE(low); + PPRO_CTR_WRITE(low, msrs, 0); +} + + +struct op_x86_model_spec const op_ppro_spec = { + num_counters: PPRO_NUM_COUNTERS, + num_controls: PPRO_NUM_CONTROLS, + fill_in_addresses: &ppro_fill_in_addresses, + setup_ctrs: &ppro_setup_ctrs, + check_ctrs: &ppro_check_ctrs, + start: &ppro_start, + stop: &ppro_stop +}; --- module/x86/op_nmi.c Sat Sep 7 14:19:39 2002 +++ module/x86/op_nmi.c Fri Sep 20 10:49:55 2002 @@ -14,173 +14,81 @@ #include "op_apic.h" #include "op_events.h" #include "op_util.h" +#include "op_x86_model.h" -/* the MSRs we need */ -static uint perfctr_msr[OP_MAX_COUNTERS]; -static uint eventsel_msr[OP_MAX_COUNTERS]; +static struct op_msrs cpu_msrs[OP_MAX_CPUS]; +static struct op_x86_model_spec const * model = NULL; -/* number of counters physically present */ -static uint op_nr_counters = 2; - -/* whether we enable for each counter (athlon) or globally (intel) */ -static int separate_running_bit; +static struct op_x86_model_spec const * get_model(void) +{ + if (! model) { + /* pick out our per-model function table */ + switch (sysctl.cpu_type) { + case CPU_ATHLON: + model = &op_athlon_spec; + break; + + default: + model = &op_ppro_spec; + break; + } + } + return model; +} /* ---------------- NMI handler ------------------ */ /* preempt: all things inside the interrupt handler are preempt safe : we * never reenable interrupt */ -static void op_check_ctr(uint cpu, struct pt_regs *regs, int ctr) -{ - ulong l,h; - get_perfctr(l, h, ctr); - if (ctr_overflowed(l)) { - op_do_profile(cpu, regs, ctr); - set_perfctr(oprof_data[cpu].ctr_count[ctr], ctr); - } -} - asmlinkage void op_do_nmi(struct pt_regs * regs) { - uint cpu = op_cpu_id(); - int i; + uint const cpu = op_cpu_id (); + struct op_msrs const * const msrs = &(cpu_msrs[cpu]); - for (i = 0 ; i < op_nr_counters ; ++i) - op_check_ctr(cpu, regs, i); + /* assume model is set by now */ + model->check_ctrs (cpu, msrs, regs); } /* ---------------- PMC setup ------------------ */ -static void pmc_fill_in(uint *val, u8 kernel, u8 user, u8 event, u8 um) +static int pmc_setup_ctr(void *dummy) { - /* enable interrupt generation */ - *val |= (1<<20); - /* enable/disable chosen OS and USR counting */ - (user) ? (*val |= (1<<16)) - : (*val &= ~(1<<16)); - - (kernel) ? (*val |= (1<<17)) - : (*val &= ~(1<<17)); - - /* what are we counting ? */ - *val |= event; - *val |= (um<<8); -} - -static void pmc_setup(void *dummy) -{ - uint low, high; - int i; - - /* IA Vol. 3 Figure 15-3 */ - - /* Stop and clear all counter: IA32 use bit 22 of eventsel_msr0 to - * enable/disable all counter, AMD use separate bit 22 in each msr, - * all bits are cleared except the reserved bits 21 */ - for (i = 0 ; i < op_nr_counters ; ++i) { - rdmsr(eventsel_msr[i], low, high); - wrmsr(eventsel_msr[i], low & (1 << 21), high); + uint const cpu = op_cpu_id (); + struct op_msrs const * const msrs = &(cpu_msrs[cpu]); - /* avoid a false detection of ctr overflow in NMI handler */ - wrmsr(perfctr_msr[i], -1, -1); - } - - /* setup each counter */ - for (i = 0 ; i < op_nr_counters ; ++i) { - if (sysctl.ctr[i].event) { - rdmsr(eventsel_msr[i], low, high); - - low &= 1 << 21; /* do not touch the reserved bit */ - set_perfctr(sysctl.ctr[i].count, i); - - pmc_fill_in(&low, sysctl.ctr[i].kernel, sysctl.ctr[i].user, - sysctl.ctr[i].event, sysctl.ctr[i].unit_mask); - - wrmsr(eventsel_msr[i], low, high); - } - } - - /* Here all setup is made except the start/stop bit 22, counter - * disabled contains zeros in the eventsel msr except the reserved bit - * 21 */ + get_model()->setup_ctrs (msrs); + return 0; } static int pmc_setup_all(void) { - if ((smp_call_function(pmc_setup, NULL, 0, 1))) + if (smp_call_function(pmc_setup_ctr, NULL, 0, 1)) return -EFAULT; - - pmc_setup(NULL); + pmc_setup_ctr (NULL); return 0; } -inline static void pmc_start_P6(void) -{ - uint low,high; - - rdmsr(eventsel_msr[0], low, high); - wrmsr(eventsel_msr[0], low | (1 << 22), high); -} - -inline static void pmc_start_Athlon(void) -{ - uint low,high; - int i; - - for (i = 0 ; i < op_nr_counters ; ++i) { - if (sysctl.ctr[i].count) { - rdmsr(eventsel_msr[i], low, high); - wrmsr(eventsel_msr[i], low | (1 << 22), high); - } - } -} - static void pmc_start(void *info) { - if (info && (*((uint *)info) != op_cpu_id())) + uint const cpu = op_cpu_id (); + struct op_msrs const * const msrs = &(cpu_msrs[cpu]); + + if (info && (*((uint *)info) != cpu)) return; - /* assert: all enable counter are setup except the bit start/stop, - * all counter disable contains zeroes (except perhaps the reserved - * bit 21), counter disable contains -1 sign extended in msr count */ - - /* enable all needed counter */ - if (separate_running_bit == 0) - pmc_start_P6(); - else - pmc_start_Athlon(); + get_model()->start (msrs); } -inline static void pmc_stop_P6(void) -{ - uint low,high; - - rdmsr(eventsel_msr[0], low, high); - wrmsr(eventsel_msr[0], low & ~(1 << 22), high); -} - -inline static void pmc_stop_Athlon(void) -{ - uint low,high; - int i; - - for (i = 0 ; i < op_nr_counters ; ++i) { - if (sysctl.ctr[i].count) { - rdmsr(eventsel_msr[i], low, high); - wrmsr(eventsel_msr[i], low & ~(1 << 22), high); - } - } -} static void pmc_stop(void *info) { - if (info && (*((uint *)info) != op_cpu_id())) + uint const cpu = op_cpu_id (); + struct op_msrs const * const msrs = &(cpu_msrs[cpu]); + + if (info && (*((uint *)info) != cpu)) return; - /* disable counters */ - if (separate_running_bit == 0) - pmc_stop_P6(); - else - pmc_stop_Athlon(); + get_model()->stop (msrs); } static void pmc_select_start(uint cpu) @@ -206,7 +114,7 @@ for (cpu = 0 ; cpu < OP_MAX_CPUS; cpu++) { struct _oprof_data * data = &oprof_data[cpu]; - for (i = 0 ; i < op_nr_counters ; ++i) { + for (i = 0 ; i < get_model()->num_counters ; ++i) { if (sysctl.ctr[i].enabled) data->ctr_count[i] = sysctl.ctr[i].count; else @@ -231,11 +139,11 @@ int i; int enabled = 0; int ok = 0; - - for (i = 0; i < op_nr_counters ; i++) { + + for (i = 0; i < get_model()->num_counters; i++) { int min_count; int ret; - + if (!sysctl.ctr[i].enabled) continue; @@ -267,8 +175,8 @@ printk(KERN_ERR "oprofile: ctr%d: %d: can't count event for this counter\n", i, sysctl.ctr[i].event); } - - if (ret != OP_OK_EVENT) + + if (ret != OP_OK_EVENT) ok = -EINVAL; } @@ -280,50 +188,109 @@ return ok; } -static uint saved_perfctr_low[OP_MAX_COUNTERS]; -static uint saved_perfctr_high[OP_MAX_COUNTERS]; -static uint saved_eventsel_low[OP_MAX_COUNTERS]; -static uint saved_eventsel_high[OP_MAX_COUNTERS]; -static int pmc_init(void) +static int pmc_save_registers(void *dummy) { - int i; - int err = 0; - - if (sysctl.cpu_type == CPU_ATHLON) { - op_nr_counters = 4; - separate_running_bit = 1; + uint i; + uint const cpu = op_cpu_id (); + uint const n_ctrs = get_model()->num_counters; + uint const n_ctrls = get_model()->num_controls; + struct op_msrs * const msrs = &(cpu_msrs[cpu]); + + msrs->counters.addrs = NULL; + msrs->counters.saved = NULL; + msrs->controls.addrs = NULL; + msrs->controls.saved = NULL; + + msrs->counters.addrs = kmalloc (n_ctrs * + sizeof (uint), GFP_KERNEL); + if (! msrs->counters.addrs) + goto fault; + + msrs->counters.saved = kmalloc (n_ctrs * + sizeof (struct op_saved_msr), + GFP_KERNEL); + if (! msrs->counters.saved) + goto fault; + + msrs->controls.addrs = kmalloc (n_ctrls * sizeof (uint), + GFP_KERNEL); + if (! msrs->controls.addrs) + goto fault; + + msrs->counters.saved = kmalloc (n_ctrls * + sizeof (struct op_saved_msr), + GFP_KERNEL); + if (! msrs->controls.saved) + goto fault; + + model->fill_in_addresses (msrs); + + for (i = 0; i < n_ctrs; ++i) { + rdmsr (msrs->counters.addrs[i], + msrs->counters.saved[i].low, + msrs->counters.saved[i].high); + } + + for (i = 0; i < n_ctrls; ++i) { + rdmsr (msrs->controls.addrs[i], + msrs->controls.saved[i].low, + msrs->controls.saved[i].high); } + return 0; - /* let's use the right MSRs */ - switch (sysctl.cpu_type) { - case CPU_ATHLON: - eventsel_msr[0] = MSR_K7_PERFCTL0; - eventsel_msr[1] = MSR_K7_PERFCTL1; - eventsel_msr[2] = MSR_K7_PERFCTL2; - eventsel_msr[3] = MSR_K7_PERFCTL3; - perfctr_msr[0] = MSR_K7_PERFCTR0; - perfctr_msr[1] = MSR_K7_PERFCTR1; - perfctr_msr[2] = MSR_K7_PERFCTR2; - perfctr_msr[3] = MSR_K7_PERFCTR3; - break; - default: - eventsel_msr[0] = MSR_P6_EVNTSEL0; - eventsel_msr[1] = MSR_P6_EVNTSEL1; - perfctr_msr[0] = MSR_P6_PERFCTR0; - perfctr_msr[1] = MSR_P6_PERFCTR1; - break; - } + fault: + if (msrs->counters.addrs) + kfree (msrs->counters.addrs); + if (msrs->counters.saved) + kfree (msrs->counters.saved); + if (msrs->controls.addrs) + kfree (msrs->controls.addrs); + if (msrs->controls.saved) + kfree (msrs->controls.saved); + return -EFAULT; +} + +static int pmc_restore_registers(void *dummy) +{ + uint i; + uint const cpu = op_cpu_id (); + uint const n_ctrs = get_model()->num_counters; + uint const n_ctrls = get_model()->num_controls; + struct op_msrs * const msrs = &(cpu_msrs[cpu]); + + for (i = 0; i < n_ctrls; ++i) { + wrmsr (msrs->controls.addrs[i], + msrs->controls.saved[i].low, + msrs->controls.saved[i].high); + } + + for (i = 0; i < n_ctrs; ++i) { + wrmsr (msrs->counters.addrs[i], + msrs->counters.saved[i].low, + msrs->counters.saved[i].high); + } + + kfree (msrs->controls.saved); + kfree (msrs->controls.addrs); + kfree (msrs->counters.saved); + kfree (msrs->counters.addrs); + return 0; +} - for (i = 0 ; i < op_nr_counters ; ++i) { - rdmsr(eventsel_msr[i], saved_eventsel_low[i], saved_eventsel_high[i]); - rdmsr(perfctr_msr[i], saved_perfctr_low[i], saved_perfctr_high[i]); +static int pmc_init(void) +{ + int err = 0; + if ((err = smp_call_function(pmc_save_registers, NULL, 0, 1))) { + smp_call_function(pmc_restore_registers, NULL, 0, 1); + goto out; } - /* setup each counter */ + pmc_save_registers(NULL); + if ((err = apic_setup())) goto out; - + if ((err = smp_call_function(lvtpc_apic_setup, NULL, 0, 1))) { lvtpc_apic_restore(NULL); } @@ -334,17 +301,13 @@ static void pmc_deinit(void) { - int i; - smp_call_function(lvtpc_apic_restore, NULL, 0, 1); lvtpc_apic_restore(NULL); - for (i = 0 ; i < op_nr_counters ; ++i) { - wrmsr(eventsel_msr[i], saved_eventsel_low[i], saved_eventsel_high[i]); - wrmsr(perfctr_msr[i], saved_perfctr_low[i], saved_perfctr_high[i]); - } - apic_restore(); + + smp_call_function(pmc_restore_registers, NULL, 0, 1); + pmc_restore_registers(NULL); } static char *names[] = { "0", "1", "2", "3", "4", }; @@ -355,7 +318,8 @@ ctl_table * tab; int i, j; - for (i=0; i < op_nr_counters; i++) { + /* now init the sysctls */ + for (i=0; i < get_model()->num_counters; i++) { next->ctl_name = 1; next->procname = names[i]; next->mode = 0755; @@ -389,8 +353,7 @@ static void pmc_remove_sysctls(ctl_table * next) { int i; - - for (i=0; i < op_nr_counters; i++) { + for (i=0; i < get_model()->num_counters; i++) { kfree(next->child); next++; } --- module/x86/op_x86_model.h Wed Dec 31 19:00:00 1969 +++ module/x86/op_x86_model.h Fri Sep 20 10:49:34 2002 @@ -0,0 +1,48 @@ +#ifndef OP_X86_MODEL_H +#define OP_X86_MODEL_H + +#include "oprofile.h" + +/** + * @file op_x86_model.h + * interface to x86 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + */ + +struct op_saved_msr { + uint high; + uint low; +}; + +struct op_msr_group { + uint * addrs; + struct op_saved_msr * saved; +}; + +struct op_msrs { + struct op_msr_group counters; + struct op_msr_group controls; +}; + +struct pt_regs; + +struct op_x86_model_spec { + uint const num_counters; + uint const num_controls; + void (* fill_in_addresses)(struct op_msrs * const msrs); + void (* setup_ctrs)(struct op_msrs const * const msrs); + void (* check_ctrs)(uint const cpu, + struct op_msrs const * const msrs, + struct pt_regs * const regs); + void (* start)(struct op_msrs const * const msrs); + void (* stop)(struct op_msrs const * const msrs); +}; + +extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec const op_athlon_spec; + +#endif /* OP_X86_MODEL_H */ |
From: William C. <wc...@nc...> - 2002-09-20 19:25:25
|
module/x86/op_model_ppro.c is missing a: #include "op_msr.h" I will try it out on the p3 and athlon machines. -Will gr...@re... wrote: > hi, > > attached is another go at the model-specific x86 backend cleanup > patch, in further preparation for landing the P4 patch. it applies to > the cvs head, builds and runs on my P4 with RTC mode; but that's not > saying much. If someone with a ppro / athlon could confirm the changes > did not disrupt those backends I'd appreciate it. there is some chance > of that, with the moving-around of things. > > differences from last round of this patch: > > - some more constification > - fixed some spacing / formatting > - removed model-fetch call from within NMI, assume it's set > - move MSR addresses to contiguous arrays to remain more > cache friendly during frequent NMIs > > any further comments / suggestions? > > -graydon |
From: <gr...@re...> - 2002-09-20 19:52:02
|
At Fri, 20 Sep 2002 14:34:20 -0400, William Cohen wrote: > module/x86/op_model_ppro.c is missing a: > > #include "op_msr.h" Ha! nice catch. it *is* missing, but I wondered how it could possibly compile without that. it seems that /usr/src/linux-2.4.19/include/asm/msr.h includes these lines too: #define MSR_P6_PERFCTR0 0xc1 #define MSR_P6_PERFCTR1 0xc2 #define MSR_P6_EVNTSEL0 0x186 #define MSR_P6_EVNTSEL1 0x187 should we change the #defines we're using, or should I just put in the apporiate #include "op_msr.h" and have it not-override the kernel definitions? -graydon |
From: Philippe E. <ph...@wa...> - 2002-09-20 20:34:43
|
gr...@re... wrote: > At Fri, 20 Sep 2002 14:34:20 -0400, > William Cohen wrote: > > >>module/x86/op_model_ppro.c is missing a: >> >>#include "op_msr.h" > > > Ha! nice catch. it *is* missing, but I wondered how it could possibly > compile without that. it seems that > > /usr/src/linux-2.4.19/include/asm/msr.h > > includes these lines too: > > #define MSR_P6_PERFCTR0 0xc1 > #define MSR_P6_PERFCTR1 0xc2 > #define MSR_P6_EVNTSEL0 0x186 > #define MSR_P6_EVNTSEL1 0x187 > > should we change the #defines we're using, or should I just put in the > apporiate #include "op_msr.h" and have it not-override the kernel > definitions? > just include op_msr.h, all #defines in this file are on the form: #ifndef MSR_P6_PERFCTR0 #define MSR_P6_PERFCTR0 0xc1 #endif It's less annoying to reuse the same name I'll take care about compatibility with previous kernel version (but only on un-patched kernel) regardss, Phil |
From: William C. <wc...@nc...> - 2002-09-20 20:39:15
|
One of the concerns is being able to compile this on older kernels. The 2.4.18 kernel doesn't appear to have the MSR_P6_* defined. The code needs to compile on earlier versions of the kernel. If the values are the same (they should be), I would leave the names the same. I tried the patch on P3 and athlon and driver didn't work. :( I verified that the unpatched version of oprofile I used worked, so there is something in the patch that isn't right. I don't know what right now. -Will gr...@re... wrote: > At Fri, 20 Sep 2002 14:34:20 -0400, > William Cohen wrote: > > >>module/x86/op_model_ppro.c is missing a: >> >>#include "op_msr.h" > > > Ha! nice catch. it *is* missing, but I wondered how it could possibly > compile without that. it seems that > > /usr/src/linux-2.4.19/include/asm/msr.h > > includes these lines too: > > #define MSR_P6_PERFCTR0 0xc1 > #define MSR_P6_PERFCTR1 0xc2 > #define MSR_P6_EVNTSEL0 0x186 > #define MSR_P6_EVNTSEL1 0x187 > > should we change the #defines we're using, or should I just put in the > apporiate #include "op_msr.h" and have it not-override the kernel > definitions? > > -graydon > |
From: Philippe E. <ph...@wa...> - 2002-09-20 22:03:00
|
William Cohen wrote: > One of the concerns is being able to compile this on older kernels. The > 2.4.18 kernel doesn't appear to have the MSR_P6_* defined. The code > needs to compile on earlier versions of the kernel. If the values are > the same (they should be), I would leave the names the same. that's handled by op_msr.h, anyway don't take care for now for backward compatiblity, I have the necessary stuff to check that and I'll provide correction, if necessary, later. > > I tried the patch on P3 and athlon and driver didn't work. :( I verified > that the unpatched version of oprofile I used worked, so there is > something in the patch that isn't right. I don't know what right now. me too, if athlon doesn't work its probably a problem of initialisation of msr number problem. I got an msr number == 2 for the first control register on PIII. See my other mail for details. I'm unsure than Graydon can test this patch on PIII/Athlon. Graydon ? regards, Phil |
From: Philippe E. <ph...@wa...> - 2002-09-20 21:56:25
|
gr...@re... wrote: > hi, > > attached is another go at the model-specific x86 backend cleanup > patch, in further preparation for landing the P4 patch. it applies to > the cvs head, builds and runs on my P4 with RTC mode; but that's not > saying much. If someone with a ppro / athlon could confirm the changes > did not disrupt those backends I'd appreciate it. there is some chance > of that, with the moving-around of things. > > differences from last round of this patch: > > - some more constification > - fixed some spacing / formatting > - removed model-fetch call from within NMI, assume it's set > - move MSR addresses to contiguous arrays to remain more > cache friendly during frequent NMIs > > any further comments / suggestions? > > --- module/x86/op_nmi.c Sat Sep 7 14:19:39 2002 > +++ module/x86/op_nmi.c Fri Sep 20 10:49:55 2002 [...] > +static int pmc_init(void) > +{ > + int err = 0; > + if ((err = smp_call_function(pmc_save_registers, NULL, 0, 1))) { > + smp_call_function(pmc_restore_registers, NULL, 0, 1); don't restore: if save fail, save can be done only partially and anyway we don't change anything in msr. But I think it doesn't explain my problem at module load: general protection fault: 0000 CPU: 0 EIP: 0010:[<d8961b95>] Not tainted EFLAGS: 00010246 eax: cb0cd520 ebx: 00000000 ecx: 00000002 edx: d8962980 esi: 00000000 edi: cb0cd520 ebp: c142e320 esp: c2953efc ds: 0018 es: 0018 ss: 0018 Process oprofiled (pid: 5006, stackpage=c2953000) Stack: d89653fc 00000000 d89658c0 c142e320 d89658c0 cb0cd520 d8960dbb d89659c0 d8960dcb 00000000 d89600d6 00000000 c26ce6e0 cb924bc0 d895fd6b 00000000 c0130882 cb924bc0 c26ce6e0 c26ce6e0 cb924bc0 ffffffe9 c012f955 cb924bc0 Call Trace: [<d89653fc>] [<d89658c0>] [<d89658c0>] [<d8960dbb>] [<d89659c0>] [<d8960dcb>] [<d89600d6>] [<d895fd6b>] [<c0130882>] [<c012f955>] [<c012f86a>] [<c012fb9a>] [<c010853f>] Code: 0f 32 25 00 00 20 00 0f 30 43 83 fb 01 7e ee 8b 44 24 1c 31 00002b1c <ppro_setup_ctrs>: 2b1c: 83 ec 08 sub $0x8,%esp 2b1f: 55 push %ebp 2b20: 57 push %edi 2b21: 56 push %esi 2b22: 53 push %ebx 2b23: 8b 44 24 1c mov 0x1c(%esp,1),%eax 2b27: 31 db xor %ebx,%ebx 2b29: 8b 40 08 mov 0x8(%eax),%eax 2b2c: 89 44 24 14 mov %eax,0x14(%esp,1) 2b30: 89 c7 mov %eax,%edi 2b32: 8b 0c 9f mov (%edi,%ebx,4),%ecx --> 2b35: 0f 32 rdmsr 2b37: 25 00 00 20 00 and $0x200000,%eax 2b3c: 0f 30 wrmsr 2b3e: 43 inc %ebx 2b3f: 83 fb 01 cmp $0x1,%ebx 2b42: 7e ee jle 2b32static void ppro_setup_ctrs(struct op_msrs const * const msrs) { uint low, high; int i; /* clear all counters */ for (i = 0 ; i < PPRO_NUM_CONTROLS; ++i) { --> PPRO_CTRL_READ (low, high, msrs, i); i == %ebx = 0, msr number == %ecx = 2 ?? It can occur if init is not done but I don't notice any obvious error in code. regards, Phil |
From: Philippe E. <ph...@wa...> - 2002-09-20 23:19:25
|
gr...@re... wrote: ok catch it: conjunction of a typo and thinko > > -static int pmc_init(void) > +static int pmc_save_registers(void *dummy) +static void see later > { > - int i; [...] > + > + msrs->counters.saved = kmalloc (n_ctrls * > + sizeof (struct op_saved_msr), > + GFP_KERNEL); > + if (! msrs->controls.saved) > + goto fault; typo: if (!msrs->counters.saved) so we returned -EFAULT in all case but ... > +static int pmc_init(void) > +{ > + int err = 0; > + if ((err = smp_call_function(pmc_save_registers, NULL, 0, 1))) { > + smp_call_function(pmc_restore_registers, NULL, 0, 1); I already point to not restore if save fail in previous mail. The second problem here is the err returned by sm_call_function(); extern int smp_call_function (void (*func) (void *info), void *info, int retry, int wait); you can't return en error from the called function, the return code is probably something like (only smp) failure to delivery the function to all cpus. That's compile on UP because smp_call_funtion() is a macro on UP :/ So init was failed silently then we try to use incorrect msr number, boom ... **** Now another problem (yeps ...) $ op_start .. ; op_dump ; op_time $ no samples files found (try running op_dump ?) oprofiled.log verbose mode: blha blah... Read buffer of 0 entries. but that's an another story ;) the module can be loaded/unloaded now so it will more easy to catch this once. regards, Phil |
From: Philippe E. <ph...@wa...> - 2002-09-21 01:56:28
Attachments:
oprof_msr.diff
|
gr...@re... wrote: > hi, hello, the final try (I hope) I prefer attach the new diff including the previous fix. ***** op_time --> no samples files cat /proc/sys/dev/oprofile/nr_interrupts 0 > +static void ppro_start(struct op_msrs const * const msrs) > +{ > + uint low,high; > + PPRO_CTR_READ(low, high, msrs, 0); > + PPRO_CTR_SET_ACTIVE(low); > + PPRO_CTR_WRITE(low, msrs, 0); PPRO_CTR --> PPRO_CTRL, modifying the macro name for SET_ACTIVE etc... ditto for ppro_stop / athlon_start / athlon_stop ATHLON specific part not tested, can someone test ? If you get problem look athlon_start/athlon_stop. We need also a compile with a SMP kernel to catch if it exist other smp_call_function type mismatch. **** the patch is not consistent about spacing: PPRO_CTR_WRITE(sysctl.ctr[i].count, msrs, i); PPRO_CTRL_READ (low, high, msrs, i); > + kfree (msrs->controls.saved); > + kfree (msrs->controls.addrs); > + kfree (msrs->counters.saved); > + kfree (msrs->counters.addrs); what about removing all space between macro name and (, ditto for functions ? ***** btw op_model_xxx.c are 120/130 lines and a lot of cleaner than the old. It's really more easy now to debug the specific model stuff. regards, Phil |
From: Philippe E. <ph...@wa...> - 2002-09-21 02:37:26
Attachments:
oprof_msr.diff
|
sorry the last diff was missing the new file, this once apply cleanly against cvs and compile/work with 2.4.19/PIII regards, Phil |
From: <gr...@re...> - 2002-09-21 18:42:34
|
At Sat, 21 Sep 2002 04:44:34 +0200, Philippe Elie wrote: > sorry the last diff was missing the new file, this once > apply cleanly against cvs and compile/work with 2.4.19/PIII I just applied this and confirmed that with your changes it works on athlon, as well as P4-using-RTC. I think it's good now. thanks a lot for the fixes. I went out to get some food and when I got back in you had it all done! -graydon |