From: Jason Y. <jas...@am...> - 2008-05-20 18:35:58
|
hi, These emails contain the multiplexing patch for the Oprofile kernel module. It basically adds a new function pointer in oprofile_operator allowing each architecture to supply its callback to switch between different sets of event when the timer expires. Userspace tools can modify the time slice through /dev/oprofile/time_slice. It also modifies the number of counters exposed to the userspace through /dev/oprofile. For example, the number of counters for AMD CPUs are changed to 32 and multiplexed in the sets of 4. Please review it if you have chance. Thanks. Jason |
From: Jason Y. <jas...@am...> - 2008-05-20 18:36:28
|
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3f..0341721 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -80,6 +80,26 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ +static void nmi_cpu_switch(void *dummy) +{ + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + model->switch_ctrs(msrs); +} + +static int nmi_switch_event(void) +{ + /* Check CPU 0 should be sufficient */ + struct op_msrs const *msrs = &per_cpu(cpu_msrs, 0); + + if (model->check_multiplexing(msrs) < 0) + return -EINVAL; + + spin_lock(&oprofilefs_lock); + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); + spin_unlock(&oprofilefs_lock); + return 0; +} + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -326,6 +346,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + counter_config[i].save_count_low = 0; } return 0; @@ -455,6 +476,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->switch_events = nmi_switch_event; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15..786d6e0 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,13 +10,14 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; + unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..0fdf179 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -11,6 +11,7 @@ */ #include <linux/oprofile.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> @@ -18,8 +19,10 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 +#define NUM_COUNTERS 32 +#define NUM_HARDWARE_COUNTERS 4 +#define NUM_CONTROLS 32 +#define NUM_HARDWARE_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -43,21 +46,25 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; +DEFINE_PER_CPU(int, switch_index); +DEFINE_SPINLOCK(perfctr_lock); static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + int hw_counter = i % NUM_HARDWARE_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + int hw_control = i % NUM_HARDWARE_CONTROLS; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; else msrs->controls[i].addr = 0; } @@ -69,8 +76,15 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -80,14 +94,14 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; @@ -106,29 +120,47 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; } } } +/* + * Quick check to see if multiplexing is necessary. + * The check should be efficient since counters are used + * in ordre. + */ +static int athlon_check_multiplexing(struct op_msrs const * const msrs) +{ + int ret = 0; + + if (!counter_config[NUM_HARDWARE_COUNTERS].count) + ret = -EINVAL; + + return ret; +} + + static int athlon_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + spin_lock(&perfctr_lock); + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (!reset_value[offset]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + oprofile_add_sample(regs, offset); + CTR_WRITE(reset_value[offset], msrs, i); } } + spin_unlock(&perfctr_lock); + /* See op_model_ppro.c */ return 1; } @@ -138,13 +170,14 @@ static void athlon_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } + __get_cpu_var(switch_index) = 0; } @@ -153,26 +186,85 @@ static void athlon_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + spin_lock(&perfctr_lock); + /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } + + spin_unlock(&perfctr_lock); } + +static void athlon_switch_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i, s = per_cpu(switch_index, smp_processor_id()); + + athlon_stop(msrs); + + spin_lock(&perfctr_lock); + /* save the current hw counts */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if (!reset_value[offset]) + continue; + CTR_READ(low, high, msrs, i); + /* convert counter value to actual count, assume high = -1 */ + counter_config[offset].save_count_low = (unsigned int)-1 - low - 1; + } + + + /* move to next eventset */ + s += NUM_HARDWARE_COUNTERS; + if ((s > NUM_HARDWARE_COUNTERS) || (counter_config[s].count == 0)) { + per_cpu(switch_index, smp_processor_id()) = 0; + s = 0; + } else + per_cpu(switch_index, smp_processor_id()) = s; + + /* enable next active counters */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs,i))) { + if (unlikely(!counter_config[offset].save_count_low)) + counter_config[offset].save_count_low = counter_config[offset].count; + CTR_WRITE(counter_config[offset].save_count_low, msrs, i); + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[offset].user); + CTRL_SET_KERN(low, counter_config[offset].kernel); + CTRL_SET_UM(low, counter_config[offset].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); + CTRL_SET_HOST_ONLY(high, 0); + CTRL_SET_GUEST_ONLY(high, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } + + spin_unlock(&perfctr_lock); +} + + static void athlon_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -186,5 +278,7 @@ struct op_x86_model_spec const op_athlon_spec = { .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .shutdown = &athlon_shutdown, + .switch_ctrs = &athlon_switch_ctrs, + .check_multiplexing = &athlon_check_multiplexing }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..45003c2 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -41,6 +41,8 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); + void (*switch_ctrs)(struct op_msrs const * const msrs); + int (*check_multiplexing)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c64517..3ebd8d2 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/oprofile.h> #include <linux/moduleparam.h> +#include <linux/timer.h> #include <asm/mutex.h> #include "oprof.h" @@ -24,6 +25,9 @@ struct oprofile_operations oprofile_ops; unsigned long oprofile_started; unsigned long backtrace_depth; +/* Multiplexing defaults at 5000 useconds */ +unsigned long time_slice = 5 * USEC_PER_SEC ; +struct timer_list switch_timer; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); @@ -87,6 +91,17 @@ out: return err; } +static void start_switch_timer(void) +{ + switch_timer.expires = jiffies + usecs_to_jiffies(time_slice); + add_timer(&switch_timer); +} + +static void switch_interrupt(unsigned long ptr) +{ + if (!oprofile_ops.switch_events()) + start_switch_timer(); +} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -94,7 +109,6 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); - if (!is_setup) goto out; @@ -108,6 +122,9 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; + if (oprofile_ops.switch_events) + start_switch_timer(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +140,7 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + del_timer_sync(&switch_timer); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +173,29 @@ post_sync: mutex_unlock(&start_mutex); } +int oprofile_set_time_slice(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + time_slice = val; + +out: + mutex_unlock(&start_mutex); + return err; + +} int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +220,18 @@ out: return err; } +static void __init oprofile_switch_timer_init(void) +{ + init_timer(&switch_timer); + switch_timer.function = switch_interrupt; + switch_timer.data = 0; +} + static int __init oprofile_init(void) { int err; + oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { @@ -191,8 +240,10 @@ static int __init oprofile_init(void) } err = oprofilefs_register(); - if (err) + if (err) { + del_timer_sync(&switch_timer); oprofile_arch_exit(); + } return err; } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 1832365..fc3a2bd 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; - +extern unsigned long time_slice; + struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_time_slice(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba..25c78dd 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -18,6 +18,37 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +static ssize_t time_slice_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_ulong_to_user(time_slice, buf, count, offset); +} + + +static ssize_t time_slice_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_time_slice(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations time_slice_fops = { + .read = time_slice_read, + .write = time_slice_write +}; + + static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -85,11 +116,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -129,6 +159,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofilefs_create_file(sb, root, "time_slice", &time_slice_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..6c764cb 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -65,6 +65,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optioinal. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; |
From: John L. <le...@mo...> - 2008-05-23 14:16:07
|
On Tue, May 20, 2008 at 01:35:27PM -0500, Jason Yeh wrote: > These emails contain the multiplexing patch for the Oprofile kernel > module. It basically adds a new function pointer in oprofile_operator > allowing each architecture to supply its callback to switch between > different sets of event when the timer expires. Userspace tools can > modify the time slice through /dev/oprofile/time_slice. I took a very quick look at the kernel patch and it looks good to me. regards john |
From: Jason Y. <jas...@am...> - 2008-05-23 15:42:20
|
John, Thanks for taking a quick look. I discovered locking state inconsistency when turning on spin lock debugging options and made changes to avoid it. Instead of using timer_list, the code uses workqueue to swap out the events. I will send the patch to LKML if there are no other comments. Thanks. Jason --- diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3f..fdd1fd2 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -80,6 +80,24 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ +static void nmi_cpu_switch(void *dummy) +{ + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + model->switch_ctrs(msrs); +} + +static int nmi_switch_event(void) +{ + /* Check CPU 0 should be sufficient */ + struct op_msrs const *msrs = &per_cpu(cpu_msrs, 0); + + if (model->check_multiplexing(msrs) < 0) + return -EINVAL; + + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); + return 0; +} + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -326,6 +344,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + counter_config[i].save_count_low = 0; } return 0; @@ -455,6 +474,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->switch_events = nmi_switch_event; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15..786d6e0 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,13 +10,14 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; + unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..2684683 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -11,6 +11,7 @@ */ #include <linux/oprofile.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> @@ -18,8 +19,10 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 +#define NUM_COUNTERS 32 +#define NUM_HARDWARE_COUNTERS 4 +#define NUM_CONTROLS 32 +#define NUM_HARDWARE_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -43,21 +46,24 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; +DEFINE_PER_CPU(int, switch_index); static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + int hw_counter = i % NUM_HARDWARE_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + int hw_control = i % NUM_HARDWARE_CONTROLS; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; else msrs->controls[i].addr = 0; } @@ -69,8 +75,15 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -80,14 +93,14 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; @@ -106,26 +119,41 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; } } } +/* + * Quick check to see if multiplexing is necessary. + * The check should be efficient since counters are used + * in ordre. + */ +static int athlon_check_multiplexing(struct op_msrs const * const msrs) +{ + int ret = 0; + + if (!counter_config[NUM_HARDWARE_COUNTERS].count) + ret = -EINVAL; + + return ret; +} + + static int athlon_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (!reset_value[offset]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + oprofile_add_sample(regs, offset); + CTR_WRITE(reset_value[offset], msrs, i); } } @@ -138,13 +166,14 @@ static void athlon_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } + __get_cpu_var(switch_index) = 0; } @@ -155,8 +184,8 @@ static void athlon_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); @@ -164,15 +193,67 @@ static void athlon_stop(struct op_msrs const * const msrs) } } + +static void athlon_switch_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i, s = per_cpu(switch_index, smp_processor_id()); + + athlon_stop(msrs); + + /* save the current hw counts */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if (!reset_value[offset]) + continue; + CTR_READ(low, high, msrs, i); + /* convert counter value to actual count, assume high = -1 */ + counter_config[offset].save_count_low = (unsigned int)-1 - low - 1; + } + + + /* move to next eventset */ + s += NUM_HARDWARE_COUNTERS; + if ((s > NUM_HARDWARE_COUNTERS) || (counter_config[s].count == 0)) { + per_cpu(switch_index, smp_processor_id()) = 0; + s = 0; + } else + per_cpu(switch_index, smp_processor_id()) = s; + + /* enable next active counters */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs,i))) { + if (unlikely(!counter_config[offset].save_count_low)) + counter_config[offset].save_count_low = counter_config[offset].count; + CTR_WRITE(counter_config[offset].save_count_low, msrs, i); + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[offset].user); + CTRL_SET_KERN(low, counter_config[offset].kernel); + CTRL_SET_UM(low, counter_config[offset].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); + CTRL_SET_HOST_ONLY(high, 0); + CTRL_SET_GUEST_ONLY(high, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + static void athlon_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -186,5 +267,7 @@ struct op_x86_model_spec const op_athlon_spec = { .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .shutdown = &athlon_shutdown, + .switch_ctrs = &athlon_switch_ctrs, + .check_multiplexing = &athlon_check_multiplexing }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..45003c2 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -41,6 +41,8 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); + void (*switch_ctrs)(struct op_msrs const * const msrs); + int (*check_multiplexing)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c64517..ca5bd7b 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,7 @@ #include <linux/init.h> #include <linux/oprofile.h> #include <linux/moduleparam.h> +#include <linux/workqueue.h> #include <asm/mutex.h> #include "oprof.h" @@ -24,6 +25,9 @@ struct oprofile_operations oprofile_ops; unsigned long oprofile_started; unsigned long backtrace_depth; +/* Multiplexing defaults at 5000 useconds */ +unsigned long time_slice = 5 * USEC_PER_SEC ; +struct delayed_work switch_work; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); @@ -87,6 +91,16 @@ out: return err; } +static void start_switch_worker(void) +{ + schedule_delayed_work(&switch_work, usecs_to_jiffies(time_slice)); +} + +static void switch_worker(unsigned long ptr) +{ + if (!oprofile_ops.switch_events()) + start_switch_worker(); +} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -94,7 +108,6 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); - if (!is_setup) goto out; @@ -108,6 +121,9 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; + if (oprofile_ops.switch_events) + start_switch_worker(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +139,7 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + cancel_delayed_work_sync(&switch_work); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +172,29 @@ post_sync: mutex_unlock(&start_mutex); } +int oprofile_set_time_slice(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + time_slice = val; + +out: + mutex_unlock(&start_mutex); + return err; + +} int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +219,16 @@ out: return err; } +static void __init oprofile_switch_timer_init(void) +{ + INIT_DELAYED_WORK(&switch_work, switch_worker); +} + static int __init oprofile_init(void) { int err; + oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 1832365..fc3a2bd 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; - +extern unsigned long time_slice; + struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_time_slice(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba..25c78dd 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -18,6 +18,37 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +static ssize_t time_slice_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_ulong_to_user(time_slice, buf, count, offset); +} + + +static ssize_t time_slice_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_time_slice(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations time_slice_fops = { + .read = time_slice_read, + .write = time_slice_write +}; + + static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -85,11 +116,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -129,6 +159,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofilefs_create_file(sb, root, "time_slice", &time_slice_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..6c764cb 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -65,6 +65,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optioinal. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; |