From: Jason Y. <jas...@am...> - 2008-06-03 13:44:57
|
This is an updated patch to enable Oprofile module to switch between different sets of events at the user specified interval. It allows the module to gather more event statistics than the number of event counters on the hardware in a single run of profiling. A new file (/dev/oprofile/timeout_ms) is added for user to specify the interval. If the number of user specified events is more than the number of events counter on the hardware, the patch will schedule a delayed work and switch/re-writes the different sets of value into the events counter. The switching mechanism needs to be done for each architecture if it wishes to support this multiplexing scheme. Only AMD CPU is supported in this patch. Signed-off-by: Jason Yeh <jas...@am...> --- arch/x86/oprofile/nmi_int.c | 20 +++++ arch/x86/oprofile/op_counter.h | 3 arch/x86/oprofile/op_model_athlon.c | 123 +++++++++++++++++++++++++++++------- arch/x86/oprofile/op_x86_model.h | 2 drivers/oprofile/oprof.c | 57 +++++++++++++++- drivers/oprofile/oprof.h | 4 - drivers/oprofile/oprofile_files.c | 39 ++++++++++- include/linux/oprofile.h | 3 8 files changed, 223 insertions(+), 28 deletions(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3f..42fef97 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -80,6 +80,24 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ +static void nmi_cpu_switch(void *dummy) +{ + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + model->switch_ctrs(msrs); +} + +static int nmi_switch_event(void) +{ + /* Check CPU 0 should be sufficient */ + struct op_msrs const *msrs = &per_cpu(cpu_msrs, 0); + + if (model->check_multiplexing(msrs) < 0) + return -EINVAL; + + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); + return 0; +} + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -326,6 +344,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + counter_config[i].save_count_low = 0; } return 0; @@ -455,6 +474,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->switch_events = nmi_switch_event; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15..786d6e0 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,13 +10,14 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; + unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..4a09666 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -11,6 +11,7 @@ */ #include <linux/oprofile.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> @@ -18,8 +19,10 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 +#define NUM_COUNTERS 32 +#define NUM_HARDWARE_COUNTERS 4 +#define NUM_CONTROLS 32 +#define NUM_HARDWARE_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -43,21 +46,24 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; +static DEFINE_PER_CPU(int, switch_index); static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + int hw_counter = i % NUM_HARDWARE_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + int hw_control = i % NUM_HARDWARE_CONTROLS; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; else msrs->controls[i].addr = 0; } @@ -69,8 +75,15 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -80,14 +93,14 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; @@ -106,26 +119,36 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; } } } +/* + * Quick check to see if multiplexing is necessary. + * The check should be efficient since counters are used + * in ordre. + */ +static int athlon_check_multiplexing(struct op_msrs const * const msrs) +{ + return counter_config[NUM_HARDWARE_COUNTERS].count ? 0 : -EINVAL; +} + + static int athlon_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (!reset_value[offset]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + oprofile_add_sample(regs, offset); + CTR_WRITE(reset_value[offset], msrs, i); } } @@ -138,13 +161,14 @@ static void athlon_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } + __get_cpu_var(switch_index) = 0; } @@ -155,8 +179,8 @@ static void athlon_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); @@ -164,15 +188,70 @@ static void athlon_stop(struct op_msrs const * const msrs) } } + +static void athlon_switch_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i, s = per_cpu(switch_index, smp_processor_id()); + + athlon_stop(msrs); + + /* save the current hw counts */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if (!reset_value[offset]) + continue; + CTR_READ(low, high, msrs, i); + /* convert counter value to actual count, assume high = -1 */ + counter_config[offset].save_count_low = + (unsigned int) -1 - low - 1; + } + + /* move to next eventset */ + s += NUM_HARDWARE_COUNTERS; + if ((s > NUM_HARDWARE_COUNTERS) || (counter_config[s].count == 0)) { + per_cpu(switch_index, smp_processor_id()) = 0; + s = 0; + } else + per_cpu(switch_index, smp_processor_id()) = s; + + /* enable next active counters */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if ((counter_config[offset].enabled) + && (CTR_IS_RESERVED(msrs, i))) { + if (unlikely(!counter_config[offset].save_count_low)) + counter_config[offset].save_count_low = + counter_config[offset].count; + CTR_WRITE(counter_config[offset].save_count_low, + msrs, i); + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[offset].user); + CTRL_SET_KERN(low, counter_config[offset].kernel); + CTRL_SET_UM(low, counter_config[offset].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); + CTRL_SET_HOST_ONLY(high, 0); + CTRL_SET_GUEST_ONLY(high, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } +} + + static void athlon_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -186,5 +265,7 @@ struct op_x86_model_spec const op_athlon_spec = { .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .shutdown = &athlon_shutdown, + .switch_ctrs = &athlon_switch_ctrs, + .check_multiplexing = &athlon_check_multiplexing }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..45003c2 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -41,6 +41,8 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); + void (*switch_ctrs)(struct op_msrs const * const msrs); + int (*check_multiplexing)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c64517..9385e1a 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,8 @@ #include <linux/init.h> #include <linux/oprofile.h> #include <linux/moduleparam.h> +#include <linux/workqueue.h> +#include <linux/time.h> #include <asm/mutex.h> #include "oprof.h" @@ -19,13 +21,18 @@ #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprofile_stats.h" + +static unsigned long is_setup; +static void switch_worker(struct work_struct *work); +static DECLARE_DELAYED_WORK(switch_work, switch_worker); +static DEFINE_MUTEX(start_mutex); struct oprofile_operations oprofile_ops; +unsigned long timeout_jiffies; unsigned long oprofile_started; unsigned long backtrace_depth; -static unsigned long is_setup; -static DEFINE_MUTEX(start_mutex); +/* Multiplexing defaults at 1 msec*/ /* timer 0 - use performance monitoring hardware if available @@ -87,6 +94,16 @@ out: return err; } +static void start_switch_worker(void) +{ + schedule_delayed_work(&switch_work, timeout_jiffies); +} + +static void switch_worker(struct work_struct *work) +{ + if (!oprofile_ops.switch_events()) + start_switch_worker(); +} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -94,7 +111,6 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); - if (!is_setup) goto out; @@ -108,6 +124,9 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; + if (oprofile_ops.switch_events) + start_switch_worker(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +142,7 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + cancel_delayed_work_sync(&switch_work); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +175,31 @@ post_sync: mutex_unlock(&start_mutex); } +/* User inputs in ms, converts to jiffies */ +int oprofile_set_timeout(unsigned long val_msec) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + if ((timeout_jiffies = msecs_to_jiffies(val_msec)) == MAX_JIFFY_OFFSET) + timeout_jiffies = msecs_to_jiffies(1); + +out: + mutex_unlock(&start_mutex); + return err; + +} int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +224,16 @@ out: return err; } +static void __init oprofile_switch_timer_init(void) +{ + timeout_jiffies = msecs_to_jiffies(1); +} + static int __init oprofile_init(void) { int err; + oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 1832365..c4406a7 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; - +extern unsigned long timeout_jiffies; + struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_timeout(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba..cc4f5a1 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/oprofile.h> +#include <linux/jiffies.h> #include "event_buffer.h" #include "oprofile_stats.h" @@ -18,6 +19,40 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +static ssize_t timeout_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), + buf, count, offset); +} + + +static ssize_t timeout_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_timeout(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations timeout_fops = { + .read = timeout_read, + .write = timeout_write, +}; + + static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -85,11 +120,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -129,6 +163,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofilefs_create_file(sb, root, "timeout_ms", &timeout_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..71af056 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -65,6 +65,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optional. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; |
From: Andrew M. <ak...@li...> - 2008-06-04 09:11:31
|
On Tue, 3 Jun 2008 08:44:35 -0500 Jason Yeh <jas...@am...> wrote: > This is an updated patch to enable Oprofile module to switch between different > sets of events at the user specified interval. It allows the module to gather > more event statistics than the number of event counters on the hardware in a > single run of profiling. > > A new file (/dev/oprofile/timeout_ms) is added for user to specify the interval. > If the number of user specified events is more than the number of events counter > on the hardware, the patch will schedule a delayed work and switch/re-writes the > different sets of value into the events counter. The switching mechanism needs > to be done for each architecture if it wishes to support this multiplexing scheme. > Only AMD CPU is supported in this patch. Should oprofile userspace be updated for this new feature? Would it make sense for non-AMD CPUs to also implement this? |
From: Jason Y. <jas...@am...> - 2008-06-04 13:09:15
|
Andrew Morton wrote: > On Tue, 3 Jun 2008 08:44:35 -0500 Jason Yeh <jas...@am...> wrote: > >> This is an updated patch to enable Oprofile module to switch between different >> sets of events at the user specified interval. It allows the module to gather >> more event statistics than the number of event counters on the hardware in a >> single run of profiling. >> >> A new file (/dev/oprofile/timeout_ms) is added for user to specify the interval. >> If the number of user specified events is more than the number of events counter >> on the hardware, the patch will schedule a delayed work and switch/re-writes the >> different sets of value into the events counter. The switching mechanism needs >> to be done for each architecture if it wishes to support this multiplexing scheme. >> Only AMD CPU is supported in this patch. > > Should oprofile userspace be updated for this new feature? Yes. The patches are in the pipeline. I will be sending out the userspace patch for Oprofile today. > > Would it make sense for non-AMD CPUs to also implement this? > I believe it makes sense for non-AMD CPUs to also implement this. Due to hardware limitation, it's out of reach for me to implement it. |
From: Andi K. <an...@fi...> - 2008-06-11 12:01:20
|
Jason Yeh <jas...@am...> writes: > This is an updated patch to enable Oprofile module to switch between different > sets of events at the user specified interval. It allows the module to gather > more event statistics than the number of event counters on the hardware in a > single run of profiling. > > A new file (/dev/oprofile/timeout_ms) is added for user to specify the interval. > If the number of user specified events is more than the number of events counter > on the hardware, the patch will schedule a delayed work and switch/re-writes the > different sets of value into the events counter. The switching mechanism needs > to be done for each architecture if it wishes to support this multiplexing scheme. > Only AMD CPU is supported in this patch. Can you please move the event multiplexing logic (in particular athlon_switch_ctrs) one level up into the generic oprofile code? It looks pretty generic to me and there's probably no reason to implement it for op_model_athlon only. With the current setup any other driver who wanted to implement it would need to cut'n'paste a lot of code and in the end all the low level drivers would be much more complicated than they are today. Thanks, -Andi |
From: Jason Y. <jas...@am...> - 2008-06-11 13:23:33
|
Andi Kleen wrote: > > Can you please move the event multiplexing logic (in particular athlon_switch_ctrs) > one level up into the generic oprofile code? > > It looks pretty generic to me and there's probably no reason to implement > it for op_model_athlon only. With the current setup any other driver > who wanted to implement it would need to cut'n'paste a lot of code > and in the end all the low level drivers would be much more complicated > than they are today. I am not sure if the code in op_model_athlon are all that generic. Most of the codes in athlon_switch_ctrs are dealing with the peculiarities of AMD's performance counter format. Other than indexing into the counter_config, most of the code are very similar to athlon_stop and athlon_setup_ctrs. I will look into this anyway. I agree that if the codes are generic, they should be moving one level up avoiding more complexities. Jason |
From: Ingo M. <mi...@el...> - 2008-06-16 07:30:09
|
* Jason Yeh <jas...@am...> wrote: >> It looks pretty generic to me and there's probably no reason to >> implement it for op_model_athlon only. With the current setup any >> other driver who wanted to implement it would need to cut'n'paste a >> lot of code and in the end all the low level drivers would be much >> more complicated than they are today. agreed. > I am not sure if the code in op_model_athlon are all that generic. > Most of the codes in athlon_switch_ctrs are dealing with the > peculiarities of AMD's performance counter format. Other than indexing > into the counter_config, most of the code are very similar to > athlon_stop and athlon_setup_ctrs. I will look into this anyway. I > agree that if the codes are generic, they should be moving one level > up avoiding more complexities. yes. Your code in essence virtualizes performance counters and thus removes most of the visible hardware limitations via time-sharing. There is no reason why this should be CPU specific: oprofile can already stop/start a particular counter - how frequently this is done, and for what purpose, is a higher layer abstraction that the lowlevel code should not be aware of. Ingo |
From: Jason Y. <jas...@am...> - 2008-06-18 20:07:15
|
Hi, This is the response to Andi's request to move up the multiplexing one level. Most of the patches are identical except that the switching logic uses the generic function to stop sampling, re-program counters, and re-start. The "op_x86_model_sepct" has also been modified slightly to contain numbers of hardware counter and the maximun number of counter to multiplex. The "op_msr" has also been modify to store the content of the counter when it is swapped out. Signed-off-by: Jason Yeh <jas...@am...> --- arch/x86/oprofile/nmi_int.c | 100 +++++++++++++++++++++++++++++++++--- arch/x86/oprofile/op_counter.h | 3 - arch/x86/oprofile/op_model_athlon.c | 79 +++++++++++++++++----------- arch/x86/oprofile/op_model_p4.c | 4 + arch/x86/oprofile/op_model_ppro.c | 2 arch/x86/oprofile/op_x86_model.h | 3 + drivers/oprofile/oprof.c | 58 +++++++++++++++++++- drivers/oprofile/oprof.h | 4 + drivers/oprofile/oprofile_files.c | 39 +++++++++++++- include/linux/oprofile.h | 3 + 10 files changed, 249 insertions(+), 46 deletions(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3f..45ae73a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -22,12 +22,18 @@ #include "op_counter.h" #include "op_x86_model.h" +DEFINE_PER_CPU(int, switch_index); + static struct op_x86_model_spec const *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); static int nmi_start(void); static void nmi_stop(void); +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs); +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs); +static void nmi_cpu_stop(void *dummy); +static void nmi_cpu_start(void *dummy); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; @@ -80,6 +86,47 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ +static void nmi_cpu_switch(void *dummy) +{ + int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_hardware_counters; + if ((si > model->num_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, smp_processor_id()) = 0; + else + per_cpu(switch_index, smp_processor_id()) = si; + + nmi_cpu_restore_mpx_registers(msrs); + model->setup_ctrs(msrs); + nmi_cpu_start(NULL); +} + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_hardware_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (nmi_multiplex_on() < 0) + return -EINVAL; + + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); + + return 0; +} + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -143,11 +190,10 @@ static void free_msrs(void) static int allocate_msrs(void) { - int success = 1; + int i, success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; - int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, GFP_KERNEL); @@ -155,8 +201,8 @@ static int allocate_msrs(void) success = 0; break; } - per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, - GFP_KERNEL); + per_cpu(cpu_msrs, i).controls = + kmalloc(controls_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) { success = 0; break; @@ -200,7 +246,8 @@ static int nmi_setup(void) return err; } - /* We need to serialize save and setup for HT because the subset + /* + * We need to serialize save and setup for HT because the subset * of msrs are distinct for save and setup operations */ @@ -216,7 +263,6 @@ static int nmi_setup(void) per_cpu(cpu_msrs, 0).controls, sizeof(struct op_msr) * model->num_controls); } - } on_each_cpu(nmi_save_registers, NULL, 0, 1); on_each_cpu(nmi_cpu_setup, NULL, 0, 1); @@ -224,7 +270,41 @@ static int nmi_setup(void) return 0; } -static void nmi_restore_registers(struct op_msrs *msrs) +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + unsigned int const nr_ctrs = model->num_hardware_counters; + struct op_msr *counters = &msrs->counters[si]; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + int offset = i + si; + if (counters[offset].addr) { + rdmsr(counters[offset].addr, + counters[offset].multiplex.low, + counters[offset].multiplex.high); + } + } +} + +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + unsigned int si = __get_cpu_var(switch_index); + unsigned int const nr_ctrs = model->num_hardware_counters; + struct op_msr *counters = &msrs->counters[si]; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + int offset = i + si; + if (counters[offset].addr) { + wrmsr(counters[offset].addr, + counters[offset].multiplex.low, + counters[offset].multiplex.high); + } + } +} + +static void nmi_cpu_restore_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; @@ -264,7 +344,8 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); - nmi_restore_registers(msrs); + nmi_cpu_restore_registers(msrs); + __get_cpu_var(switch_index) = 0; } static void nmi_shutdown(void) @@ -326,6 +407,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + counter_config[i].save_count_low = 0; } return 0; @@ -449,12 +531,14 @@ int __init op_nmi_init(struct oprofile_operations *ops) init_sysfs(); using_nmi = 1; + __get_cpu_var(switch_index) = 0; ops->create_files = nmi_create_files; ops->setup = nmi_setup; ops->shutdown = nmi_shutdown; ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->switch_events = nmi_switch_event; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15..786d6e0 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,13 +10,14 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; + unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..ea3f226 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -11,6 +11,7 @@ */ #include <linux/oprofile.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> #include <asm/nmi.h> @@ -18,8 +19,10 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 +#define NUM_COUNTERS 32 +#define NUM_HARDWARE_COUNTERS 4 +#define NUM_CONTROLS 32 +#define NUM_HARDWARE_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -43,21 +46,24 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; +DECLARE_PER_CPU(int, switch_index); static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + int hw_counter = i % NUM_HARDWARE_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + int hw_control = i % NUM_HARDWARE_CONTROLS; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; else msrs->controls[i].addr = 0; } @@ -69,8 +75,16 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + for (i = 0; i < NUM_HARDWARE_CONTROLS; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (counter_config[offset].enabled) + reset_value[offset] = counter_config[offset].count; + else + reset_value[offset] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -80,34 +94,31 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { - reset_value[i] = counter_config[i].count; - - CTR_WRITE(counter_config[i].count, msrs, i); + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + __get_cpu_var(switch_index); + if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs, i))) { + CTR_WRITE(counter_config[offset].count, msrs, i); CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[i].event); - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); + CTRL_SET_USR(low, counter_config[offset].user); + CTRL_SET_KERN(low, counter_config[offset].kernel); + CTRL_SET_UM(low, counter_config[offset].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); CTRL_SET_HOST_ONLY(high, 0); CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; } } } @@ -119,13 +130,14 @@ static int athlon_check_ctrs(struct pt_regs * const regs, unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (!reset_value[offset]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + oprofile_add_sample(regs, offset); + CTR_WRITE(reset_value[offset], msrs, i); } } @@ -138,8 +150,10 @@ static void athlon_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (reset_value[i]) { + + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (reset_value[offset]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); @@ -155,8 +169,8 @@ static void athlon_stop(struct op_msrs const * const msrs) /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); @@ -164,15 +178,16 @@ static void athlon_stop(struct op_msrs const * const msrs) } } + static void athlon_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -181,10 +196,12 @@ static void athlon_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_athlon_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_hardware_counters = NUM_HARDWARE_COUNTERS, + .num_hardware_controls = NUM_HARDWARE_CONTROLS, .fill_in_addresses = &athlon_fill_in_addresses, .setup_ctrs = &athlon_setup_ctrs, .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .shutdown = &athlon_shutdown, }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 56b4757..e641545 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -701,6 +701,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, + .num_hardware_counters = NUM_COUNTERS_HT2, + .num_hardware_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, @@ -713,6 +715,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { struct op_x86_model_spec const op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, + .num_hardware_counters = NUM_COUNTERS_NON_HT, + .num_hardware_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, .setup_ctrs = &p4_setup_ctrs, .check_ctrs = &p4_check_ctrs, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index eff431f..e5811aa 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -183,6 +183,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec const op_ppro_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_hardware_counters = NUM_COUNTERS, + .num_hardware_controls = NUM_CONTROLS, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..fcfcf10 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -19,6 +19,7 @@ struct op_saved_msr { struct op_msr { unsigned long addr; struct op_saved_msr saved; + struct op_saved_msr multiplex; }; struct op_msrs { @@ -32,6 +33,8 @@ struct pt_regs; * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { + unsigned int const num_hardware_counters; + unsigned int const num_hardware_controls; unsigned int const num_counters; unsigned int const num_controls; void (*fill_in_addresses)(struct op_msrs * const msrs); diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c64517..b2fa5df 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,8 @@ #include <linux/init.h> #include <linux/oprofile.h> #include <linux/moduleparam.h> +#include <linux/workqueue.h> +#include <linux/time.h> #include <asm/mutex.h> #include "oprof.h" @@ -19,13 +21,18 @@ #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprofile_stats.h" + +static unsigned long is_setup; +static void switch_worker(struct work_struct *work); +static DECLARE_DELAYED_WORK(switch_work, switch_worker); +static DEFINE_MUTEX(start_mutex); struct oprofile_operations oprofile_ops; +unsigned long timeout_jiffies; unsigned long oprofile_started; unsigned long backtrace_depth; -static unsigned long is_setup; -static DEFINE_MUTEX(start_mutex); +/* Multiplexing defaults at 1 msec*/ /* timer 0 - use performance monitoring hardware if available @@ -87,6 +94,16 @@ out: return err; } +static void start_switch_worker(void) +{ + schedule_delayed_work(&switch_work, timeout_jiffies); +} + +static void switch_worker(struct work_struct *work) +{ + if (!oprofile_ops.switch_events()) + start_switch_worker(); +} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -94,7 +111,6 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); - if (!is_setup) goto out; @@ -108,6 +124,9 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; + if (oprofile_ops.switch_events) + start_switch_worker(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +142,7 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + cancel_delayed_work_sync(&switch_work); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +175,32 @@ post_sync: mutex_unlock(&start_mutex); } +/* User inputs in ms, converts to jiffies */ +int oprofile_set_timeout(unsigned long val_msec) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + timeout_jiffies = msecs_to_jiffies(val_msec); + if (timeout_jiffies == MAX_JIFFY_OFFSET) + timeout_jiffies = msecs_to_jiffies(1); + +out: + mutex_unlock(&start_mutex); + return err; + +} int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +225,16 @@ out: return err; } +static void __init oprofile_switch_timer_init(void) +{ + timeout_jiffies = msecs_to_jiffies(1); +} + static int __init oprofile_init(void) { int err; + oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 1832365..c4406a7 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; - +extern unsigned long timeout_jiffies; + struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_timeout(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba..cc4f5a1 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -9,6 +9,7 @@ #include <linux/fs.h> #include <linux/oprofile.h> +#include <linux/jiffies.h> #include "event_buffer.h" #include "oprofile_stats.h" @@ -18,6 +19,40 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +static ssize_t timeout_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), + buf, count, offset); +} + + +static ssize_t timeout_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_timeout(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations timeout_fops = { + .read = timeout_read, + .write = timeout_write, +}; + + static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -85,11 +120,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -129,6 +163,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofilefs_create_file(sb, root, "timeout_ms", &timeout_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..71af056 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -65,6 +65,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optional. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; }; |
From: Andi K. <an...@fi...> - 2008-06-19 02:52:43
|
Jason Yeh <jas...@am...> writes: Looks much better than the previous patch. > +/* > + * Quick check to see if multiplexing is necessary. > + * The check should be sufficient since counters are used > + * in ordre. > + */ > +static int nmi_multiplex_on(void) > +{ > + return counter_config[model->num_hardware_counters].count ? 0 : -EINVAL; > +} > + > +static int nmi_switch_event(void) > +{ > + if (nmi_multiplex_on() < 0) > + return -EINVAL; > + > + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); Wouldn't it be better to use per CPU timers for the switching? That would lower the overhead especially on larger systems significantly I think. The "one timer which interrupts all CPUs" model tends to not scale well and also has latency problems. Also the naming of the function is weird. It is called nmi_*, but only runs in a workqueue? Why is the workqueue needed anyways? -Andi |
From: Jason Y. <jas...@am...> - 2008-06-19 18:18:22
|
Andi Kleen wrote: > Looks much better than the previous patch. > >> +/* >> + * Quick check to see if multiplexing is necessary. >> + * The check should be sufficient since counters are used >> + * in ordre. >> + */ >> +static int nmi_multiplex_on(void) >> +{ >> + return counter_config[model->num_hardware_counters].count ? 0 : -EINVAL; >> +} >> + >> +static int nmi_switch_event(void) >> +{ >> + if (nmi_multiplex_on() < 0) >> + return -EINVAL; >> + >> + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); > > Wouldn't it be better to use per CPU timers for the switching? That would > lower the overhead especially on larger systems significantly I think. The > "one timer which interrupts all CPUs" model tends to not scale well > and also has latency problems. I will look into this. > > Also the naming of the function is weird. It is called nmi_*, but only > runs in a workqueue? Why is the workqueue needed anyways? I used workqueue as a way to schedule the next interrupt. I felt that using the hrtimer is really an overkill for the events multiplexing. Swapping events at one ms or below introduces to much overhead. Jason |
From: Andi K. <an...@fi...> - 2008-06-19 18:32:35
|
>> >> Also the naming of the function is weird. It is called nmi_*, but only >> runs in a workqueue? Why is the workqueue needed anyways? > > I used workqueue as a way to schedule the next interrupt. I felt that > using the > hrtimer is really an overkill for the events multiplexing. Swapping > events at > one ms or below introduces to much overhead. Why not give the user the choice what frequency they want? -Andi |
From: Jason Y. <jas...@am...> - 2008-06-19 18:38:53
|
Andi Kleen wrote: >>> Also the naming of the function is weird. It is called nmi_*, but only >>> runs in a workqueue? Why is the workqueue needed anyways? >> I used workqueue as a way to schedule the next interrupt. I felt that >> using the >> hrtimer is really an overkill for the events multiplexing. Swapping >> events at >> one ms or below introduces to much overhead. > > Why not give the user the choice what frequency they want? > Yeah. I will give them a way to shoot themselves on their foot :) I will look into using hrtimer in the next version unless someone feels strongly against it. Jason |
From: Andi K. <an...@fi...> - 2008-06-19 18:45:33
|
Jason Yeh wrote: > Andi Kleen wrote: >>>> Also the naming of the function is weird. It is called nmi_*, but only >>>> runs in a workqueue? Why is the workqueue needed anyways? >>> I used workqueue as a way to schedule the next interrupt. I felt that >>> using the >>> hrtimer is really an overkill for the events multiplexing. Swapping >>> events at >>> one ms or below introduces to much overhead. >> >> Why not give the user the choice what frequency they want? >> > > Yeah. I will give them a way to shoot themselves on their foot :) Well it's pretty easy using oprofile anyways. > I will look into using hrtimer in the next version unless someone > feels strongly against it. My main point was not using hrtimer, but using per cpu timer. That's not necessarily the same (both come in both flavours) -Andi |
From: Robert R. <rob...@am...> - 2008-07-15 16:48:03
|
Jason, please repost your final patch version for review. I would like to prepare the patch for 2.6.27. Thanks, -Robert On 18.06.08 15:07:04, Jason Yeh wrote: > Hi, > > This is the response to Andi's request to move up the multiplexing one level. > Most of the patches are identical except that the switching logic uses the > generic function to stop sampling, re-program counters, and re-start. The > "op_x86_model_sepct" has also been modified slightly to contain numbers of > hardware counter and the maximun number of counter to multiplex. The "op_msr" > has also been modify to store the content of the counter when it is swapped out. > > > Signed-off-by: Jason Yeh <jas...@am...> > > --- > > arch/x86/oprofile/nmi_int.c | 100 +++++++++++++++++++++++++++++++++--- > arch/x86/oprofile/op_counter.h | 3 - > arch/x86/oprofile/op_model_athlon.c | 79 +++++++++++++++++----------- > arch/x86/oprofile/op_model_p4.c | 4 + > arch/x86/oprofile/op_model_ppro.c | 2 > arch/x86/oprofile/op_x86_model.h | 3 + > drivers/oprofile/oprof.c | 58 +++++++++++++++++++- > drivers/oprofile/oprof.h | 4 + > drivers/oprofile/oprofile_files.c | 39 +++++++++++++- > include/linux/oprofile.h | 3 + > 10 files changed, 249 insertions(+), 46 deletions(-) > > > diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c > index cc48d3f..45ae73a 100644 > --- a/arch/x86/oprofile/nmi_int.c > +++ b/arch/x86/oprofile/nmi_int.c > @@ -22,12 +22,18 @@ > #include "op_counter.h" > #include "op_x86_model.h" > > +DEFINE_PER_CPU(int, switch_index); > + > static struct op_x86_model_spec const *model; > static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); > static DEFINE_PER_CPU(unsigned long, saved_lvtpc); > > static int nmi_start(void); > static void nmi_stop(void); > +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs); > +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs); > +static void nmi_cpu_stop(void *dummy); > +static void nmi_cpu_start(void *dummy); > > /* 0 == registered but off, 1 == registered and on */ > static int nmi_enabled = 0; > @@ -80,6 +86,47 @@ static void exit_sysfs(void) > #define exit_sysfs() do { } while (0) > #endif /* CONFIG_PM */ > > +static void nmi_cpu_switch(void *dummy) > +{ > + int cpu = smp_processor_id(); > + int si = per_cpu(switch_index, cpu); > + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); > + > + nmi_cpu_stop(NULL); > + nmi_cpu_save_mpx_registers(msrs); > + > + /* move to next set */ > + si += model->num_hardware_counters; > + if ((si > model->num_counters) || (counter_config[si].count == 0)) > + per_cpu(switch_index, smp_processor_id()) = 0; > + else > + per_cpu(switch_index, smp_processor_id()) = si; > + > + nmi_cpu_restore_mpx_registers(msrs); > + model->setup_ctrs(msrs); > + nmi_cpu_start(NULL); > +} > + > +/* > + * Quick check to see if multiplexing is necessary. > + * The check should be sufficient since counters are used > + * in ordre. > + */ > +static int nmi_multiplex_on(void) > +{ > + return counter_config[model->num_hardware_counters].count ? 0 : -EINVAL; > +} > + > +static int nmi_switch_event(void) > +{ > + if (nmi_multiplex_on() < 0) > + return -EINVAL; > + > + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); > + > + return 0; > +} > + > static int profile_exceptions_notify(struct notifier_block *self, > unsigned long val, void *data) > { > @@ -143,11 +190,10 @@ static void free_msrs(void) > > static int allocate_msrs(void) > { > - int success = 1; > + int i, success = 1; > size_t controls_size = sizeof(struct op_msr) * model->num_controls; > size_t counters_size = sizeof(struct op_msr) * model->num_counters; > > - int i; > for_each_possible_cpu(i) { > per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, > GFP_KERNEL); > @@ -155,8 +201,8 @@ static int allocate_msrs(void) > success = 0; > break; > } > - per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, > - GFP_KERNEL); > + per_cpu(cpu_msrs, i).controls = > + kmalloc(controls_size, GFP_KERNEL); > if (!per_cpu(cpu_msrs, i).controls) { > success = 0; > break; > @@ -200,7 +246,8 @@ static int nmi_setup(void) > return err; > } > > - /* We need to serialize save and setup for HT because the subset > + /* > + * We need to serialize save and setup for HT because the subset > * of msrs are distinct for save and setup operations > */ > > @@ -216,7 +263,6 @@ static int nmi_setup(void) > per_cpu(cpu_msrs, 0).controls, > sizeof(struct op_msr) * model->num_controls); > } > - > } > on_each_cpu(nmi_save_registers, NULL, 0, 1); > on_each_cpu(nmi_cpu_setup, NULL, 0, 1); > @@ -224,7 +270,41 @@ static int nmi_setup(void) > return 0; > } > > -static void nmi_restore_registers(struct op_msrs *msrs) > +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) > +{ > + unsigned int si = __get_cpu_var(switch_index); > + unsigned int const nr_ctrs = model->num_hardware_counters; > + struct op_msr *counters = &msrs->counters[si]; > + unsigned int i; > + > + for (i = 0; i < nr_ctrs; ++i) { > + int offset = i + si; > + if (counters[offset].addr) { > + rdmsr(counters[offset].addr, > + counters[offset].multiplex.low, > + counters[offset].multiplex.high); > + } > + } > +} > + > +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) > +{ > + unsigned int si = __get_cpu_var(switch_index); > + unsigned int const nr_ctrs = model->num_hardware_counters; > + struct op_msr *counters = &msrs->counters[si]; > + unsigned int i; > + > + for (i = 0; i < nr_ctrs; ++i) { > + int offset = i + si; > + if (counters[offset].addr) { > + wrmsr(counters[offset].addr, > + counters[offset].multiplex.low, > + counters[offset].multiplex.high); > + } > + } > +} > + > +static void nmi_cpu_restore_registers(struct op_msrs *msrs) > { > unsigned int const nr_ctrs = model->num_counters; > unsigned int const nr_ctrls = model->num_controls; > @@ -264,7 +344,8 @@ static void nmi_cpu_shutdown(void *dummy) > apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); > apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); > apic_write(APIC_LVTERR, v); > - nmi_restore_registers(msrs); > + nmi_cpu_restore_registers(msrs); > + __get_cpu_var(switch_index) = 0; > } > > static void nmi_shutdown(void) > @@ -326,6 +407,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) > oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); > oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); > oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); > + counter_config[i].save_count_low = 0; > } > > return 0; > @@ -449,12 +531,14 @@ int __init op_nmi_init(struct oprofile_operations *ops) > > init_sysfs(); > using_nmi = 1; > + __get_cpu_var(switch_index) = 0; > ops->create_files = nmi_create_files; > ops->setup = nmi_setup; > ops->shutdown = nmi_shutdown; > ops->start = nmi_start; > ops->stop = nmi_stop; > ops->cpu_type = cpu_type; > + ops->switch_events = nmi_switch_event; > printk(KERN_INFO "oprofile: using NMI interrupt.\n"); > return 0; > } > diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h > index 2880b15..786d6e0 100644 > --- a/arch/x86/oprofile/op_counter.h > +++ b/arch/x86/oprofile/op_counter.h > @@ -10,13 +10,14 @@ > #ifndef OP_COUNTER_H > #define OP_COUNTER_H > > -#define OP_MAX_COUNTER 8 > +#define OP_MAX_COUNTER 32 > > /* Per-perfctr configuration as set via > * oprofilefs. > */ > struct op_counter_config { > unsigned long count; > + unsigned long save_count_low; > unsigned long enabled; > unsigned long event; > unsigned long kernel; > diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c > index 3d53487..ea3f226 100644 > --- a/arch/x86/oprofile/op_model_athlon.c > +++ b/arch/x86/oprofile/op_model_athlon.c > @@ -11,6 +11,7 @@ > */ > > #include <linux/oprofile.h> > +#include <linux/percpu.h> > #include <asm/ptrace.h> > #include <asm/msr.h> > #include <asm/nmi.h> > @@ -18,8 +19,10 @@ > #include "op_x86_model.h" > #include "op_counter.h" > > -#define NUM_COUNTERS 4 > -#define NUM_CONTROLS 4 > +#define NUM_COUNTERS 32 > +#define NUM_HARDWARE_COUNTERS 4 > +#define NUM_CONTROLS 32 > +#define NUM_HARDWARE_CONTROLS 4 > > #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) > #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) > @@ -43,21 +46,24 @@ > #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) > > static unsigned long reset_value[NUM_COUNTERS]; > +DECLARE_PER_CPU(int, switch_index); > > static void athlon_fill_in_addresses(struct op_msrs * const msrs) > { > int i; > > for (i = 0; i < NUM_COUNTERS; i++) { > - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) > - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; > + int hw_counter = i % NUM_HARDWARE_COUNTERS; > + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) > + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; > else > msrs->counters[i].addr = 0; > } > > for (i = 0; i < NUM_CONTROLS; i++) { > - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) > - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; > + int hw_control = i % NUM_HARDWARE_CONTROLS; > + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) > + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; > else > msrs->controls[i].addr = 0; > } > @@ -69,8 +75,16 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) > unsigned int low, high; > int i; > > + for (i = 0; i < NUM_HARDWARE_CONTROLS; ++i) { > + int offset = i + __get_cpu_var(switch_index); > + if (counter_config[offset].enabled) > + reset_value[offset] = counter_config[offset].count; > + else > + reset_value[offset] = 0; > + } > + > /* clear all counters */ > - for (i = 0 ; i < NUM_CONTROLS; ++i) { > + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { > if (unlikely(!CTRL_IS_RESERVED(msrs, i))) > continue; > CTRL_READ(low, high, msrs, i); > @@ -80,34 +94,31 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) > } > > /* avoid a false detection of ctr overflows in NMI handler */ > - for (i = 0; i < NUM_COUNTERS; ++i) { > + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { > if (unlikely(!CTR_IS_RESERVED(msrs, i))) > continue; > CTR_WRITE(1, msrs, i); > } > > /* enable active counters */ > - for (i = 0; i < NUM_COUNTERS; ++i) { > - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { > - reset_value[i] = counter_config[i].count; > - > - CTR_WRITE(counter_config[i].count, msrs, i); > + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { > + int offset = i + __get_cpu_var(switch_index); > + if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs, i))) { > + CTR_WRITE(counter_config[offset].count, msrs, i); > > CTRL_READ(low, high, msrs, i); > CTRL_CLEAR_LO(low); > CTRL_CLEAR_HI(high); > CTRL_SET_ENABLE(low); > - CTRL_SET_USR(low, counter_config[i].user); > - CTRL_SET_KERN(low, counter_config[i].kernel); > - CTRL_SET_UM(low, counter_config[i].unit_mask); > - CTRL_SET_EVENT_LOW(low, counter_config[i].event); > - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); > + CTRL_SET_USR(low, counter_config[offset].user); > + CTRL_SET_KERN(low, counter_config[offset].kernel); > + CTRL_SET_UM(low, counter_config[offset].unit_mask); > + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); > + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); > CTRL_SET_HOST_ONLY(high, 0); > CTRL_SET_GUEST_ONLY(high, 0); > > CTRL_WRITE(low, high, msrs, i); > - } else { > - reset_value[i] = 0; > } > } > } > @@ -119,13 +130,14 @@ static int athlon_check_ctrs(struct pt_regs * const regs, > unsigned int low, high; > int i; > > - for (i = 0 ; i < NUM_COUNTERS; ++i) { > - if (!reset_value[i]) > + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { > + int offset = i + __get_cpu_var(switch_index); > + if (!reset_value[offset]) > continue; > CTR_READ(low, high, msrs, i); > if (CTR_OVERFLOWED(low)) { > - oprofile_add_sample(regs, i); > - CTR_WRITE(reset_value[i], msrs, i); > + oprofile_add_sample(regs, offset); > + CTR_WRITE(reset_value[offset], msrs, i); > } > } > > @@ -138,8 +150,10 @@ static void athlon_start(struct op_msrs const * const msrs) > { > unsigned int low, high; > int i; > - for (i = 0 ; i < NUM_COUNTERS ; ++i) { > - if (reset_value[i]) { > + > + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { > + int offset = i + __get_cpu_var(switch_index); > + if (reset_value[offset]) { > CTRL_READ(low, high, msrs, i); > CTRL_SET_ACTIVE(low); > CTRL_WRITE(low, high, msrs, i); > @@ -155,8 +169,8 @@ static void athlon_stop(struct op_msrs const * const msrs) > > /* Subtle: stop on all counters to avoid race with > * setting our pm callback */ > - for (i = 0 ; i < NUM_COUNTERS ; ++i) { > - if (!reset_value[i]) > + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { > + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) > continue; > CTRL_READ(low, high, msrs, i); > CTRL_SET_INACTIVE(low); > @@ -164,15 +178,16 @@ static void athlon_stop(struct op_msrs const * const msrs) > } > } > > + > static void athlon_shutdown(struct op_msrs const * const msrs) > { > int i; > > - for (i = 0 ; i < NUM_COUNTERS ; ++i) { > + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { > if (CTR_IS_RESERVED(msrs, i)) > release_perfctr_nmi(MSR_K7_PERFCTR0 + i); > } > - for (i = 0 ; i < NUM_CONTROLS ; ++i) { > + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { > if (CTRL_IS_RESERVED(msrs, i)) > release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); > } > @@ -181,10 +196,12 @@ static void athlon_shutdown(struct op_msrs const * const msrs) > struct op_x86_model_spec const op_athlon_spec = { > .num_counters = NUM_COUNTERS, > .num_controls = NUM_CONTROLS, > + .num_hardware_counters = NUM_HARDWARE_COUNTERS, > + .num_hardware_controls = NUM_HARDWARE_CONTROLS, > .fill_in_addresses = &athlon_fill_in_addresses, > .setup_ctrs = &athlon_setup_ctrs, > .check_ctrs = &athlon_check_ctrs, > .start = &athlon_start, > .stop = &athlon_stop, > - .shutdown = &athlon_shutdown > + .shutdown = &athlon_shutdown, > }; > diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c > index 56b4757..e641545 100644 > --- a/arch/x86/oprofile/op_model_p4.c > +++ b/arch/x86/oprofile/op_model_p4.c > @@ -701,6 +701,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) > struct op_x86_model_spec const op_p4_ht2_spec = { > .num_counters = NUM_COUNTERS_HT2, > .num_controls = NUM_CONTROLS_HT2, > + .num_hardware_counters = NUM_COUNTERS_HT2, > + .num_hardware_controls = NUM_CONTROLS_HT2, > .fill_in_addresses = &p4_fill_in_addresses, > .setup_ctrs = &p4_setup_ctrs, > .check_ctrs = &p4_check_ctrs, > @@ -713,6 +715,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = { > struct op_x86_model_spec const op_p4_spec = { > .num_counters = NUM_COUNTERS_NON_HT, > .num_controls = NUM_CONTROLS_NON_HT, > + .num_hardware_counters = NUM_COUNTERS_NON_HT, > + .num_hardware_controls = NUM_CONTROLS_NON_HT, > .fill_in_addresses = &p4_fill_in_addresses, > .setup_ctrs = &p4_setup_ctrs, > .check_ctrs = &p4_check_ctrs, > diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c > index eff431f..e5811aa 100644 > --- a/arch/x86/oprofile/op_model_ppro.c > +++ b/arch/x86/oprofile/op_model_ppro.c > @@ -183,6 +183,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs) > struct op_x86_model_spec const op_ppro_spec = { > .num_counters = NUM_COUNTERS, > .num_controls = NUM_CONTROLS, > + .num_hardware_counters = NUM_COUNTERS, > + .num_hardware_controls = NUM_CONTROLS, > .fill_in_addresses = &ppro_fill_in_addresses, > .setup_ctrs = &ppro_setup_ctrs, > .check_ctrs = &ppro_check_ctrs, > diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h > index 45b605f..fcfcf10 100644 > --- a/arch/x86/oprofile/op_x86_model.h > +++ b/arch/x86/oprofile/op_x86_model.h > @@ -19,6 +19,7 @@ struct op_saved_msr { > struct op_msr { > unsigned long addr; > struct op_saved_msr saved; > + struct op_saved_msr multiplex; > }; > > struct op_msrs { > @@ -32,6 +33,8 @@ struct pt_regs; > * various x86 CPU models' perfctr support. > */ > struct op_x86_model_spec { > + unsigned int const num_hardware_counters; > + unsigned int const num_hardware_controls; > unsigned int const num_counters; > unsigned int const num_controls; > void (*fill_in_addresses)(struct op_msrs * const msrs); > diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c > index 2c64517..b2fa5df 100644 > --- a/drivers/oprofile/oprof.c > +++ b/drivers/oprofile/oprof.c > @@ -12,6 +12,8 @@ > #include <linux/init.h> > #include <linux/oprofile.h> > #include <linux/moduleparam.h> > +#include <linux/workqueue.h> > +#include <linux/time.h> > #include <asm/mutex.h> > > #include "oprof.h" > @@ -19,13 +21,18 @@ > #include "cpu_buffer.h" > #include "buffer_sync.h" > #include "oprofile_stats.h" > + > +static unsigned long is_setup; > +static void switch_worker(struct work_struct *work); > +static DECLARE_DELAYED_WORK(switch_work, switch_worker); > +static DEFINE_MUTEX(start_mutex); > > struct oprofile_operations oprofile_ops; > > +unsigned long timeout_jiffies; > unsigned long oprofile_started; > unsigned long backtrace_depth; > -static unsigned long is_setup; > -static DEFINE_MUTEX(start_mutex); > +/* Multiplexing defaults at 1 msec*/ > > /* timer > 0 - use performance monitoring hardware if available > @@ -87,6 +94,16 @@ out: > return err; > } > > +static void start_switch_worker(void) > +{ > + schedule_delayed_work(&switch_work, timeout_jiffies); > +} > + > +static void switch_worker(struct work_struct *work) > +{ > + if (!oprofile_ops.switch_events()) > + start_switch_worker(); > +} > > /* Actually start profiling (echo 1>/dev/oprofile/enable) */ > int oprofile_start(void) > @@ -94,7 +111,6 @@ int oprofile_start(void) > int err = -EINVAL; > > mutex_lock(&start_mutex); > - > if (!is_setup) > goto out; > > @@ -108,6 +124,9 @@ int oprofile_start(void) > if ((err = oprofile_ops.start())) > goto out; > > + if (oprofile_ops.switch_events) > + start_switch_worker(); > + > oprofile_started = 1; > out: > mutex_unlock(&start_mutex); > @@ -123,6 +142,7 @@ void oprofile_stop(void) > goto out; > oprofile_ops.stop(); > oprofile_started = 0; > + cancel_delayed_work_sync(&switch_work); > /* wake up the daemon to read what remains */ > wake_up_buffer_waiter(); > out: > @@ -155,6 +175,32 @@ post_sync: > mutex_unlock(&start_mutex); > } > > +/* User inputs in ms, converts to jiffies */ > +int oprofile_set_timeout(unsigned long val_msec) > +{ > + int err = 0; > + > + mutex_lock(&start_mutex); > + > + if (oprofile_started) { > + err = -EBUSY; > + goto out; > + } > + > + if (!oprofile_ops.switch_events) { > + err = -EINVAL; > + goto out; > + } > + > + timeout_jiffies = msecs_to_jiffies(val_msec); > + if (timeout_jiffies == MAX_JIFFY_OFFSET) > + timeout_jiffies = msecs_to_jiffies(1); > + > +out: > + mutex_unlock(&start_mutex); > + return err; > + > +} > > int oprofile_set_backtrace(unsigned long val) > { > @@ -179,10 +225,16 @@ out: > return err; > } > > +static void __init oprofile_switch_timer_init(void) > +{ > + timeout_jiffies = msecs_to_jiffies(1); > +} > + > static int __init oprofile_init(void) > { > int err; > > + oprofile_switch_timer_init(); > err = oprofile_arch_init(&oprofile_ops); > > if (err < 0 || timer) { > diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h > index 1832365..c4406a7 100644 > --- a/drivers/oprofile/oprof.h > +++ b/drivers/oprofile/oprof.h > @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; > extern struct oprofile_operations oprofile_ops; > extern unsigned long oprofile_started; > extern unsigned long backtrace_depth; > - > +extern unsigned long timeout_jiffies; > + > struct super_block; > struct dentry; > > @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); > void oprofile_timer_init(struct oprofile_operations * ops); > > int oprofile_set_backtrace(unsigned long depth); > +int oprofile_set_timeout(unsigned long time); > > #endif /* OPROF_H */ > diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c > index ef953ba..cc4f5a1 100644 > --- a/drivers/oprofile/oprofile_files.c > +++ b/drivers/oprofile/oprofile_files.c > @@ -9,6 +9,7 @@ > > #include <linux/fs.h> > #include <linux/oprofile.h> > +#include <linux/jiffies.h> > > #include "event_buffer.h" > #include "oprofile_stats.h" > @@ -18,6 +19,40 @@ unsigned long fs_buffer_size = 131072; > unsigned long fs_cpu_buffer_size = 8192; > unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ > > +static ssize_t timeout_read(struct file *file, char __user *buf, > + size_t count, loff_t *offset) > +{ > + return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies), > + buf, count, offset); > +} > + > + > +static ssize_t timeout_write(struct file *file, char const __user *buf, > + size_t count, loff_t *offset) > +{ > + unsigned long val; > + int retval; > + > + if (*offset) > + return -EINVAL; > + > + retval = oprofilefs_ulong_from_user(&val, buf, count); > + if (retval) > + return retval; > + > + retval = oprofile_set_timeout(val); > + > + if (retval) > + return retval; > + return count; > +} > + > +static const struct file_operations timeout_fops = { > + .read = timeout_read, > + .write = timeout_write, > +}; > + > + > static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) > { > return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); > @@ -85,11 +120,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t > > if (*offset) > return -EINVAL; > - > retval = oprofilefs_ulong_from_user(&val, buf, count); > if (retval) > return retval; > - > + > if (val) > retval = oprofile_start(); > else > @@ -129,6 +163,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) > oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); > oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); > oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); > + oprofilefs_create_file(sb, root, "timeout_ms", &timeout_fops); > oprofile_create_stats_files(sb, root); > if (oprofile_ops.create_files) > oprofile_ops.create_files(sb, root); > diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h > index 041bb31..71af056 100644 > --- a/include/linux/oprofile.h > +++ b/include/linux/oprofile.h > @@ -65,6 +65,9 @@ struct oprofile_operations { > > /* Initiate a stack backtrace. Optional. */ > void (*backtrace)(struct pt_regs * const regs, unsigned int depth); > + > + /* Multiplex between different events. Optional. */ > + int (*switch_events)(void); > /* CPU identification string. */ > char * cpu_type; > }; > > > > > > ------------------------------------------------------------------------- > Check out the new SourceForge.net Marketplace. > It's the best place to buy or sell services for > just about anything Open Source. > http://sourceforge.net/services/buy/index.php > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list -- Advanced Micro Devices, Inc. Operating System Research Center email: rob...@am... |