From: Robert R. <rob...@am...> - 2011-10-17 14:35:37
|
This patch series fixes and updates hr and nmi timer modes of oprofile. I got bug reports of crashes while unloading the oprofile module. There are two fixes that address this. The fixes are also for linux-stable. Another patch reworks the nmi timer mode. This was neccessary due to the removal of the x86 watchdog. Now nmi timer mode uses perf event to setup the nmi tick source. The other patches improve testability and ease exit code. After review I will apply the patches to the oprofile tree. Thanks, -Robert The following changes since commit 910e94dd0cc5abacebf0bd5ffd859f61b9583857: Merge branch 'tip/perf/core' of git://github.com/rostedt/linux into perf/core (2011-10-12 17:14:47 +0200) Robert Richter (5): oprofile, x86: Add kernel parameter oprofile.cpu_type=timer oprofile: Fix crash when unloading module (hr timer mode) oprofile, x86: Fix crash when unloading module (nmi timer mode) oprofile: Remove exit function for timer mode oprofile, x86: Reimplement nmi timer mode using perf event Documentation/kernel-parameters.txt | 3 + arch/Kconfig | 4 + arch/x86/oprofile/Makefile | 3 +- arch/x86/oprofile/init.c | 7 +- arch/x86/oprofile/nmi_int.c | 27 ++++- arch/x86/oprofile/nmi_timer_int.c | 66 ------------- drivers/oprofile/nmi_timer_int.c | 172 +++++++++++++++++++++++++++++++++++ drivers/oprofile/oprof.c | 37 ++++++-- drivers/oprofile/oprof.h | 9 ++ drivers/oprofile/timer_int.c | 29 +++--- kernel/events/core.c | 2 + 11 files changed, 261 insertions(+), 98 deletions(-) delete mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 drivers/oprofile/nmi_timer_int.c |
From: Robert R. <rob...@am...> - 2011-10-17 14:35:35
|
The legacy x86 nmi watchdog code was removed with the implementation of the perf based nmi watchdog. This broke Oprofile's nmi timer mode. To run nmi timer mode we relied on a continuous ticking nmi source which the nmi watchdog provided. The nmi tick was no longer available and current watchdog can not be used anymore since it runs with very long periods in the range of seconds. This patch reimplements the nmi timer mode using a perf counter nmi source. Cc: Peter Zijlstra <a.p...@ch...> Signed-off-by: Robert Richter <rob...@am...> --- arch/Kconfig | 4 + arch/x86/oprofile/Makefile | 3 +- arch/x86/oprofile/nmi_timer_int.c | 66 -------------- drivers/oprofile/nmi_timer_int.c | 172 +++++++++++++++++++++++++++++++++++++ drivers/oprofile/oprof.c | 14 ++- drivers/oprofile/oprof.h | 9 ++ kernel/events/core.c | 2 + 7 files changed, 198 insertions(+), 72 deletions(-) delete mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 drivers/oprofile/nmi_timer_int.c diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669c..f01f75c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config OPROFILE_NMI_TIMER + def_bool y + def_bool PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b..1599f56 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 720bf5a..0000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo <zw...@li...> - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/oprofile.h> -#include <linux/rcupdate.h> -#include <linux/kdebug.h> - -#include <asm/nmi.h> -#include <asm/apic.h> -#include <asm/ptrace.h> - -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = NMI_LOW_PRIOR, -}; - -static int timer_start(void) -{ - if (register_die_notifier(&profile_timer_exceptions_nb)) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c new file mode 100644 index 0000000..b76c6d7 --- /dev/null +++ b/drivers/oprofile/nmi_timer_int.c @@ -0,0 +1,172 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2011 Advanced Micro Devices, Inc. + * + * @author Robert Richter <rob...@am...> + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/oprofile.h> +#include <linux/perf_event.h> + +#ifdef CONFIG_OPROFILE_NMI_TIMER + +static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); +static int ctr_running; + +static struct perf_event_attr nmi_timer_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static void nmi_timer_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + event->hw.interrupts = 0; /* don't throttle interrupts */ + oprofile_add_sample(regs, 0); +} + +static int nmi_timer_start_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (!event) { + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, + nmi_timer_callback, NULL); + if (IS_ERR(event)) + return PTR_ERR(event); + per_cpu(nmi_timer_events, cpu) = event; + } + + if (event && ctr_running) + perf_event_enable(event); + + return 0; +} + +static void nmi_timer_stop_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (event && ctr_running) + perf_event_disable(event); +} + +static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + nmi_timer_start_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nmi_timer_stop_cpu(cpu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block __cpuinitdata nmi_timer_cpu_nb = { + .notifier_call = nmi_timer_cpu_notifier +}; + +static int nmi_timer_start(void) +{ + int cpu; + + get_online_cpus(); + ctr_running = 1; + for_each_online_cpu(cpu) + nmi_timer_start_cpu(cpu); + put_online_cpus(); + + return 0; +} + +static void nmi_timer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + nmi_timer_stop_cpu(cpu); + ctr_running = 0; + put_online_cpus(); +} + +static void nmi_timer_shutdown(void) +{ + struct perf_event *event; + int cpu; + + get_online_cpus(); + unregister_cpu_notifier(&nmi_timer_cpu_nb); + for_each_possible_cpu(cpu) { + event = per_cpu(nmi_timer_events, cpu); + if (!event) + continue; + perf_event_disable(event); + per_cpu(nmi_timer_events, cpu) = NULL; + perf_event_release_kernel(event); + } + + put_online_cpus(); +} + +static int nmi_timer_setup(void) +{ + int cpu, err; + + /* clock cycles per tick: */ + nmi_timer_attr.sample_period = (u64)cpu_khz * TICK_NSEC / NSEC_PER_MSEC; + pr_info("sample_period: %lld, cpu_khz: %d, nsec/tick: %ld\n", + nmi_timer_attr.sample_period, cpu_khz, TICK_NSEC); + + get_online_cpus(); + err = register_cpu_notifier(&nmi_timer_cpu_nb); + if (err) + goto out; + /* can't attach events to offline cpus: */ + for_each_online_cpu(cpu) { + err = nmi_timer_start_cpu(cpu); + if (err) + break; + } + if (err) + nmi_timer_shutdown(); +out: + put_online_cpus(); + return err; +} + +int __init op_nmi_timer_init(struct oprofile_operations *ops) +{ + int err = 0; + + err = nmi_timer_setup(); + if (err) + return err; + nmi_timer_shutdown(); /* only check, don't alloc */ + + ops->create_files = NULL; + ops->setup = nmi_timer_setup; + ops->shutdown = nmi_timer_shutdown; + ops->start = nmi_timer_start; + ops->stop = nmi_timer_stop; + ops->cpu_type = "timer"; + + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + + return 0; +} + +#endif diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f7cd069..af9cbea 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -248,15 +248,21 @@ static int __init oprofile_init(void) /* always init architecture to setup backtrace support */ err = oprofile_arch_init(&oprofile_ops); - timer_mode = err || timer; /* fall back to timer mode on errors */ + timer_mode = err || timer; if (timer_mode) { if (!err) oprofile_arch_exit(); - err = oprofile_timer_init(&oprofile_ops); - if (err) - return err; + /* no nmi timer mode if oprofile.timer is set: */ + if (!timer) + err = op_nmi_timer_init(&oprofile_ops); + /* fall back to timer mode on errors: */ + if (err || timer) + err = oprofile_timer_init(&oprofile_ops); } + if (err) + return err; + err = oprofilefs_register(); if (!err) return 0; diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 177b73d..769fb0f 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -36,6 +36,15 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); +#ifdef CONFIG_OPROFILE_NMI_TIMER +int op_nmi_timer_init(struct oprofile_operations *ops); +#else +static inline int op_nmi_timer_init(struct oprofile_operations *ops) +{ + return -ENODEV; +} +#endif + int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/kernel/events/core.c b/kernel/events/core.c index d1a1bee..d2e28bd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1322,6 +1322,7 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, @@ -1806,6 +1807,7 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_enable); int perf_event_refresh(struct perf_event *event, int refresh) { -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:08:12
|
See below for Some more comments after testing and reviewing the code. Will send a new version with these comments incorporated. -Robert On 17.10.11 10:27:04, Robert Richter wrote: > +static struct notifier_block __cpuinitdata nmi_timer_cpu_nb = { The __cpuinitdata attribute causes a section mismatch, will fix it. > + .notifier_call = nmi_timer_cpu_notifier > +}; > +static int nmi_timer_setup(void) > +{ > + int cpu, err; > + > + /* clock cycles per tick: */ > + nmi_timer_attr.sample_period = (u64)cpu_khz * TICK_NSEC / NSEC_PER_MSEC; This is broken on 32 bit. Will replace it with: /* clock cycles per tick: */ period = (u64)cpu_khz * 1000; do_div(period, HZ); nmi_timer_attr.sample_period = period; > + pr_info("sample_period: %lld, cpu_khz: %d, nsec/tick: %ld\n", > + nmi_timer_attr.sample_period, cpu_khz, TICK_NSEC); I will remove this pr_info(). > + > + get_online_cpus(); > + err = register_cpu_notifier(&nmi_timer_cpu_nb); > + if (err) > + goto out; > diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c > index f7cd069..af9cbea 100644 > --- a/drivers/oprofile/oprof.c > +++ b/drivers/oprofile/oprof.c > @@ -248,15 +248,21 @@ static int __init oprofile_init(void) > /* always init architecture to setup backtrace support */ > err = oprofile_arch_init(&oprofile_ops); > > - timer_mode = err || timer; /* fall back to timer mode on errors */ > + timer_mode = err || timer; > if (timer_mode) { > if (!err) > oprofile_arch_exit(); > - err = oprofile_timer_init(&oprofile_ops); > - if (err) > - return err; > + /* no nmi timer mode if oprofile.timer is set: */ > + if (!timer) > + err = op_nmi_timer_init(&oprofile_ops); As nmi timer is initialized here, we can remove it in arch/x86. This eases setup functions for x86. > + /* fall back to timer mode on errors: */ > + if (err || timer) > + err = oprofile_timer_init(&oprofile_ops); > } > > + if (err) > + return err; > + > err = oprofilefs_register(); > if (!err) > return 0; -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Robert R. <rob...@am...> - 2011-10-17 14:35:37
|
Oprofile may crash in a KVM guest while unlaoding modules. This happens if oprofile_arch_init() fails and oprofile switches to the hr timer mode as a fallback. In this case oprofile_arch_exit() is called, but it never was initialized properly which causes the crash. This patch fixes this. oprofile: using timer interrupt. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 PGD 41da3f067 PUD 41d80e067 PMD 0 Oops: 0002 [#1] PREEMPT SMP CPU 5 Modules linked in: oprofile(-) Pid: 2382, comm: modprobe Not tainted 3.1.0-rc7-00018-g709a39d #18 Advanced Micro Device Anaheim/Anaheim RIP: 0010:[<ffffffff8123c226>] [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP: 0018:ffff88041de1de98 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffffffffa00060e0 RCX: dead000000200200 RDX: 0000000000000000 RSI: dead000000100100 RDI: ffffffff8178c620 RBP: ffff88041de1dea8 R08: 0000000000000001 R09: 0000000000000082 R10: 0000000000000000 R11: ffff88041de1dde8 R12: 0000000000000080 R13: fffffffffffffff5 R14: 0000000000000001 R15: 0000000000610210 FS: 00007f9ae5bef700(0000) GS:ffff88042fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 000000041ca44000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2382, threadinfo ffff88041de1c000, task ffff88042db6d040) Stack: ffff88041de1deb8 ffffffffa0006770 ffff88041de1deb8 ffffffffa000251e ffff88041de1dec8 ffffffffa00022c2 ffff88041de1ded8 ffffffffa0004993 ffff88041de1df78 ffffffff81073115 656c69666f72706f 0000000000610200 Call Trace: [<ffffffffa000251e>] op_nmi_exit+0x15/0x17 [oprofile] [<ffffffffa00022c2>] oprofile_arch_exit+0xe/0x10 [oprofile] [<ffffffffa0004993>] oprofile_exit+0x13/0x15 [oprofile] [<ffffffff81073115>] sys_delete_module+0x1c3/0x22f [<ffffffff811bf09e>] ? trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff8148070b>] system_call_fastpath+0x16/0x1b Code: 20 c6 78 81 e8 c5 cc 23 00 48 8b 13 48 8b 43 08 48 be 00 01 10 00 00 00 ad de 48 b9 00 02 20 00 00 00 ad de 48 c7 c7 20 c6 78 81 89 42 08 48 89 10 48 89 33 48 89 4b 08 e8 a6 c0 23 00 5a 5b RIP [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP <ffff88041de1de98> CR2: 0000000000000008 ---[ end trace 06d4e95b6aa3b437 ]--- CC: st...@ke... # 2.6.37+ Signed-off-by: Robert Richter <rob...@am...> --- drivers/oprofile/oprof.c | 29 ++++++++++++++++++++++++----- drivers/oprofile/timer_int.c | 1 + 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index dccd863..f8c752e 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,26 +239,45 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } +static int timer_mode; + static int __init oprofile_init(void) { int err; + /* always init architecture to setup backtrace support */ err = oprofile_arch_init(&oprofile_ops); - if (err < 0 || timer) { - printk(KERN_INFO "oprofile: using timer interrupt.\n"); + + timer_mode = err || timer; /* fall back to timer mode on errors */ + if (timer_mode) { + if (!err) + oprofile_arch_exit(); err = oprofile_timer_init(&oprofile_ops); if (err) return err; } - return oprofilefs_register(); + + err = oprofilefs_register(); + if (!err) + return 0; + + /* failed */ + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); + + return err; } static void __exit oprofile_exit(void) { - oprofile_timer_exit(); oprofilefs_unregister(); - oprofile_arch_exit(); + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); } diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 3ef4462..878fba1 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -110,6 +110,7 @@ int oprofile_timer_init(struct oprofile_operations *ops) ops->start = oprofile_hrtimer_start; ops->stop = oprofile_hrtimer_stop; ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); return 0; } -- 1.7.7 |
From: Ingo M. <mi...@el...> - 2011-10-18 06:14:10
|
* Robert Richter <rob...@am...> wrote: > + /* failed */ > + if (timer_mode) > + oprofile_timer_exit(); > + else > + oprofile_arch_exit(); > - oprofile_arch_exit(); > + if (timer_mode) > + oprofile_timer_exit(); > + else > + oprofile_arch_exit(); That's rather ugly ... Thanks, Ingo |
From: Robert R. <rob...@am...> - 2011-10-17 14:35:38
|
We need this to better test x86 NMI timer mode. Otherwise it is very hard to setup systems with NMI timer enabled, but we have this e.g. in virtual machine environments. Signed-off-by: Robert Richter <rob...@am...> --- Documentation/kernel-parameters.txt | 3 +++ arch/x86/oprofile/nmi_int.c | 27 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 854ed5ca..f7735a1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1848,6 +1848,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. arch_perfmon: [X86] Force use of architectural perfmon on Intel CPUs instead of the CPU specific event set. + timer: [X86] Force use of architectural NMI + timer mode (see also oprofile.timer + for generic hr timer mode) oops=panic Always panic on oopses. Default is to just kill the process, but there is a small probability of diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fd..7ca4d43 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -613,24 +613,36 @@ static int __init p4_init(char **cpu_type) return 0; } -static int force_arch_perfmon; -static int force_cpu_type(const char *str, struct kernel_param *kp) +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "arch_perfmon")) { - force_arch_perfmon = 1; + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; } return 0; } -module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) return 0; /* @@ -697,6 +709,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-17 14:35:41
|
If oprofile uses the nmi timer interrupt there is a crash while unloading the module. The bug can be triggered with oprofile build as module and kernel parameter nolapic set. This patch fixes this. oprofile: using NMI timer interrupt. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 PGD 42dbca067 PUD 41da6a067 PMD 0 Oops: 0002 [#1] PREEMPT SMP CPU 5 Modules linked in: oprofile(-) [last unloaded: oprofile] Pid: 2518, comm: modprobe Not tainted 3.1.0-rc7-00019-gb2fb49d #19 Advanced Micro Device Anaheim/Anaheim RIP: 0010:[<ffffffff8123c226>] [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP: 0018:ffff88041ef71e98 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffffffffa0017100 RCX: dead000000200200 RDX: 0000000000000000 RSI: dead000000100100 RDI: ffffffff8178c620 RBP: ffff88041ef71ea8 R08: 0000000000000001 R09: 0000000000000082 R10: 0000000000000000 R11: ffff88041ef71de8 R12: 0000000000000080 R13: fffffffffffffff5 R14: 0000000000000001 R15: 0000000000610210 FS: 00007fc902f20700(0000) GS:ffff88042fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 000000041cdb6000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2518, threadinfo ffff88041ef70000, task ffff88041d348040) Stack: ffff88041ef71eb8 ffffffffa0017790 ffff88041ef71eb8 ffffffffa0013532 ffff88041ef71ec8 ffffffffa00132d6 ffff88041ef71ed8 ffffffffa00159b2 ffff88041ef71f78 ffffffff81073115 656c69666f72706f 0000000000610200 Call Trace: [<ffffffffa0013532>] op_nmi_exit+0x15/0x17 [oprofile] [<ffffffffa00132d6>] oprofile_arch_exit+0xe/0x10 [oprofile] [<ffffffffa00159b2>] oprofile_exit+0x1e/0x20 [oprofile] [<ffffffff81073115>] sys_delete_module+0x1c3/0x22f [<ffffffff811bf09e>] ? trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff8148070b>] system_call_fastpath+0x16/0x1b Code: 20 c6 78 81 e8 c5 cc 23 00 48 8b 13 48 8b 43 08 48 be 00 01 10 00 00 00 ad de 48 b9 00 02 20 00 00 00 ad de 48 c7 c7 20 c6 78 81 89 42 08 48 89 10 48 89 33 48 89 4b 08 e8 a6 c0 23 00 5a 5b RIP [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP <ffff88041ef71e98> CR2: 0000000000000008 ---[ end trace 43a541a52956b7b0 ]--- CC: st...@ke... # 2.6.37+ Signed-off-by: Robert Richter <rob...@am...> --- arch/x86/oprofile/init.c | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c5..f148cf6 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +static int nmi_timer; int __init oprofile_arch_init(struct oprofile_operations *ops) { @@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) #ifdef CONFIG_X86_LOCAL_APIC ret = op_nmi_init(ops); #endif + nmi_timer = (ret != 0); #ifdef CONFIG_X86_IO_APIC - if (ret < 0) + if (nmi_timer) ret = op_nmi_timer_init(ops); #endif ops->backtrace = x86_backtrace; @@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) void oprofile_arch_exit(void) { #ifdef CONFIG_X86_LOCAL_APIC - op_nmi_exit(); + if (!nmi_timer) + op_nmi_exit(); #endif } -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-17 14:35:43
|
Remove exit functions by moving init/exit code to oprofile's setup/ shutdown functions. Doing so the oprofile module exit code will be easier and less error-prone. Signed-off-by: Robert Richter <rob...@am...> --- drivers/oprofile/oprof.c | 8 ++------ drivers/oprofile/timer_int.c | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f8c752e..f7cd069 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -262,9 +262,7 @@ static int __init oprofile_init(void) return 0; /* failed */ - if (timer_mode) - oprofile_timer_exit(); - else + if (!timer_mode) oprofile_arch_exit(); return err; @@ -274,9 +272,7 @@ static int __init oprofile_init(void) static void __exit oprofile_exit(void) { oprofilefs_unregister(); - if (timer_mode) - oprofile_timer_exit(); - else + if (!timer_mode) oprofile_arch_exit(); } diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 878fba1..93404f7 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,24 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int oprofile_timer_init(struct oprofile_operations *ops) +static int oprofile_hrtimer_setup(void) { - int rc; - - rc = register_hotcpu_notifier(&oprofile_cpu_notifier); - if (rc) - return rc; - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = oprofile_hrtimer_start; - ops->stop = oprofile_hrtimer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using timer interrupt.\n"); - return 0; + return register_hotcpu_notifier(&oprofile_cpu_notifier); } -void oprofile_timer_exit(void) +static void oprofile_hrtimer_shutdown(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } + +int oprofile_timer_init(struct oprofile_operations *ops) +{ + ops->create_files = NULL; + ops->setup = oprofile_hrtimer_setup; + ops->shutdown = oprofile_hrtimer_shutdown; + ops->start = oprofile_hrtimer_start; + ops->stop = oprofile_hrtimer_stop; + ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + return 0; +} -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-17 15:22:37
|
Apologize the corrupted subject line in my emails, these are the correct patch titles: On 17.10.11 16:26:59, Robert Richter wrote: > Robert Richter (5): > oprofile, x86: Add kernel parameter oprofile.cpu_type=timer > oprofile: Fix crash when unloading module (hr timer mode) > oprofile, x86: Fix crash when unloading module (nmi timer mode) > oprofile: Remove exit function for timer mode > oprofile, x86: Reimplement nmi timer mode using perf event -Robert -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Ingo M. <mi...@el...> - 2011-10-18 06:15:28
|
* Robert Richter <rob...@am...> wrote: > /* failed */ > - if (timer_mode) > - oprofile_timer_exit(); > - else > + if (!timer_mode) > oprofile_arch_exit(); > - if (timer_mode) > - oprofile_timer_exit(); > - else > + if (!timer_mode) > oprofile_arch_exit(); ... and the ugliness didnt improve much here either. Any reason why oprofile_arch_exit() couldnt check timer_mode? Thanks, Ingo |
From: Robert R. <rob...@am...> - 2011-10-19 16:35:39
|
On 18.10.11 02:13:47, Ingo Molnar wrote: > > * Robert Richter <rob...@am...> wrote: > > > /* failed */ > > - if (timer_mode) > > - oprofile_timer_exit(); > > - else > > + if (!timer_mode) > > oprofile_arch_exit(); > > > - if (timer_mode) > > - oprofile_timer_exit(); > > - else > > + if (!timer_mode) > > oprofile_arch_exit(); > > ... and the ugliness didnt improve much here either. > > Any reason why oprofile_arch_exit() couldnt check timer_mode? The two fixes are the smallest possible solution for stable kernels. ... and they don't look nice. Moving the check to oprofile_arch_exit() would mean to make timer_mode global and to touch all oprofile_arch_exit() functions for all archs. But we could get rid of timer_mode if the exit function would be part of struct oprofile_operations. For this we would have to touch the code for all architectures too, and I actually want to avoid this. But it would be a nice solution. Anyway, I have some improvements in patch #5 that make the code looks a bit better. See below for a preview, will post a v2 patch set after testing. As said, I could remove variable timer_mode completely by moving exit functions to oprofile_operations. This could be on top of the changes below. -Robert diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f7cd069..d2994e7 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -246,26 +246,23 @@ static int __init oprofile_init(void) int err; /* always init architecture to setup backtrace support */ + timer_mode = 0; err = oprofile_arch_init(&oprofile_ops); - - timer_mode = err || timer; /* fall back to timer mode on errors */ - if (timer_mode) { - if (!err) - oprofile_arch_exit(); - err = oprofile_timer_init(&oprofile_ops); - if (err) - return err; + if (!err) { + if (!timer && oprofilefs_register()) + return 0; + oprofile_arch_exit(); } - err = oprofilefs_register(); - if (!err) - return 0; - - /* failed */ - if (!timer_mode) - oprofile_arch_exit(); + /* setup timer mode */ + timer_mode = 1; + err = op_nmi_timer_init(&oprofile_ops); + if (err) + err = oprofile_timer_init(&oprofile_ops); + if (err) + return err; - return err; + return oprofilefs_register(); } -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Peter Z. <a.p...@ch...> - 2011-10-18 11:19:45
|
On Mon, 2011-10-17 at 16:27 +0200, Robert Richter wrote: > The legacy x86 nmi watchdog code was removed with the implementation > of the perf based nmi watchdog. This broke Oprofile's nmi timer > mode. To run nmi timer mode we relied on a continuous ticking nmi > source which the nmi watchdog provided. The nmi tick was no longer > available and current watchdog can not be used anymore since it runs > with very long periods in the range of seconds. This patch > reimplements the nmi timer mode using a perf counter nmi source. fair enough, Acked-by: Peter Zijlstra <a.p...@ch...> I take it you'll take this through your oprofile tree? |
From: Robert R. <rob...@am...> - 2011-10-19 16:53:53
|
On 18.10.11 06:59:41, Peter Zijlstra wrote: > On Mon, 2011-10-17 at 16:27 +0200, Robert Richter wrote: > > The legacy x86 nmi watchdog code was removed with the implementation > > of the perf based nmi watchdog. This broke Oprofile's nmi timer > > mode. To run nmi timer mode we relied on a continuous ticking nmi > > source which the nmi watchdog provided. The nmi tick was no longer > > available and current watchdog can not be used anymore since it runs > > with very long periods in the range of seconds. This patch > > reimplements the nmi timer mode using a perf counter nmi source. > > fair enough, > > Acked-by: Peter Zijlstra <a.p...@ch...> > > I take it you'll take this through your oprofile tree? Yes, will send it through my tree. Thanks, -Robert -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Robert R. <rob...@am...> - 2011-10-19 18:48:41
|
This patch series fixes and updates hr and nmi timer modes of oprofile. I got bug reports of crashes while unloading the oprofile module. There are two fixes that address this. The fixes are also for linux-stable. Another patch reworks the nmi timer mode. This was neccessary due to the removal of the x86 watchdog. Now nmi timer mode uses perf event to setup the nmi tick source. The other patches improve testability and ease exit code. After review I will apply the patches to the oprofile tree. Thanks, -Robert V2: Only changes in patch #5: * removing pr_info() * fix undefined reference to `__udivdi3' for 32 bit build * fix section mismatch of .cpuinit.data:nmi_timer_cpu_nb * removed nmi timer setup in arch/x86 * implemented function stubs for op_nmi_init/exit() * made code more readable in oprofile_init() Robert Richter (5): oprofile, x86: Add kernel parameter oprofile.cpu_type=timer oprofile: Fix crash when unloading module (hr timer mode) oprofile, x86: Fix crash when unloading module (nmi timer mode) oprofile: Remove exit function for timer mode oprofile, x86: Reimplement nmi timer mode using perf event Documentation/kernel-parameters.txt | 3 + arch/Kconfig | 4 + arch/x86/oprofile/Makefile | 3 +- arch/x86/oprofile/init.c | 25 ++---- arch/x86/oprofile/nmi_int.c | 27 ++++- arch/x86/oprofile/nmi_timer_int.c | 66 ------------- drivers/oprofile/nmi_timer_int.c | 173 +++++++++++++++++++++++++++++++++++ drivers/oprofile/oprof.c | 21 ++++- drivers/oprofile/oprof.h | 9 ++ drivers/oprofile/timer_int.c | 29 +++--- kernel/events/core.c | 2 + 11 files changed, 252 insertions(+), 110 deletions(-) delete mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 drivers/oprofile/nmi_timer_int.c -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:49:01
|
Oprofile may crash in a KVM guest while unlaoding modules. This happens if oprofile_arch_init() fails and oprofile switches to the hr timer mode as a fallback. In this case oprofile_arch_exit() is called, but it never was initialized properly which causes the crash. This patch fixes this. oprofile: using timer interrupt. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 PGD 41da3f067 PUD 41d80e067 PMD 0 Oops: 0002 [#1] PREEMPT SMP CPU 5 Modules linked in: oprofile(-) Pid: 2382, comm: modprobe Not tainted 3.1.0-rc7-00018-g709a39d #18 Advanced Micro Device Anaheim/Anaheim RIP: 0010:[<ffffffff8123c226>] [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP: 0018:ffff88041de1de98 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffffffffa00060e0 RCX: dead000000200200 RDX: 0000000000000000 RSI: dead000000100100 RDI: ffffffff8178c620 RBP: ffff88041de1dea8 R08: 0000000000000001 R09: 0000000000000082 R10: 0000000000000000 R11: ffff88041de1dde8 R12: 0000000000000080 R13: fffffffffffffff5 R14: 0000000000000001 R15: 0000000000610210 FS: 00007f9ae5bef700(0000) GS:ffff88042fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 000000041ca44000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2382, threadinfo ffff88041de1c000, task ffff88042db6d040) Stack: ffff88041de1deb8 ffffffffa0006770 ffff88041de1deb8 ffffffffa000251e ffff88041de1dec8 ffffffffa00022c2 ffff88041de1ded8 ffffffffa0004993 ffff88041de1df78 ffffffff81073115 656c69666f72706f 0000000000610200 Call Trace: [<ffffffffa000251e>] op_nmi_exit+0x15/0x17 [oprofile] [<ffffffffa00022c2>] oprofile_arch_exit+0xe/0x10 [oprofile] [<ffffffffa0004993>] oprofile_exit+0x13/0x15 [oprofile] [<ffffffff81073115>] sys_delete_module+0x1c3/0x22f [<ffffffff811bf09e>] ? trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff8148070b>] system_call_fastpath+0x16/0x1b Code: 20 c6 78 81 e8 c5 cc 23 00 48 8b 13 48 8b 43 08 48 be 00 01 10 00 00 00 ad de 48 b9 00 02 20 00 00 00 ad de 48 c7 c7 20 c6 78 81 89 42 08 48 89 10 48 89 33 48 89 4b 08 e8 a6 c0 23 00 5a 5b RIP [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP <ffff88041de1de98> CR2: 0000000000000008 ---[ end trace 06d4e95b6aa3b437 ]--- CC: st...@ke... # 2.6.37+ Signed-off-by: Robert Richter <rob...@am...> --- drivers/oprofile/oprof.c | 29 ++++++++++++++++++++++++----- drivers/oprofile/timer_int.c | 1 + 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index dccd863..f8c752e 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -239,26 +239,45 @@ int oprofile_set_ulong(unsigned long *addr, unsigned long val) return err; } +static int timer_mode; + static int __init oprofile_init(void) { int err; + /* always init architecture to setup backtrace support */ err = oprofile_arch_init(&oprofile_ops); - if (err < 0 || timer) { - printk(KERN_INFO "oprofile: using timer interrupt.\n"); + + timer_mode = err || timer; /* fall back to timer mode on errors */ + if (timer_mode) { + if (!err) + oprofile_arch_exit(); err = oprofile_timer_init(&oprofile_ops); if (err) return err; } - return oprofilefs_register(); + + err = oprofilefs_register(); + if (!err) + return 0; + + /* failed */ + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); + + return err; } static void __exit oprofile_exit(void) { - oprofile_timer_exit(); oprofilefs_unregister(); - oprofile_arch_exit(); + if (timer_mode) + oprofile_timer_exit(); + else + oprofile_arch_exit(); } diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 3ef4462..878fba1 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -110,6 +110,7 @@ int oprofile_timer_init(struct oprofile_operations *ops) ops->start = oprofile_hrtimer_start; ops->stop = oprofile_hrtimer_stop; ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); return 0; } -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:49:04
|
If oprofile uses the nmi timer interrupt there is a crash while unloading the module. The bug can be triggered with oprofile build as module and kernel parameter nolapic set. This patch fixes this. oprofile: using NMI timer interrupt. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 PGD 42dbca067 PUD 41da6a067 PMD 0 Oops: 0002 [#1] PREEMPT SMP CPU 5 Modules linked in: oprofile(-) [last unloaded: oprofile] Pid: 2518, comm: modprobe Not tainted 3.1.0-rc7-00019-gb2fb49d #19 Advanced Micro Device Anaheim/Anaheim RIP: 0010:[<ffffffff8123c226>] [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP: 0018:ffff88041ef71e98 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffffffffa0017100 RCX: dead000000200200 RDX: 0000000000000000 RSI: dead000000100100 RDI: ffffffff8178c620 RBP: ffff88041ef71ea8 R08: 0000000000000001 R09: 0000000000000082 R10: 0000000000000000 R11: ffff88041ef71de8 R12: 0000000000000080 R13: fffffffffffffff5 R14: 0000000000000001 R15: 0000000000610210 FS: 00007fc902f20700(0000) GS:ffff88042fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000008 CR3: 000000041cdb6000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process modprobe (pid: 2518, threadinfo ffff88041ef70000, task ffff88041d348040) Stack: ffff88041ef71eb8 ffffffffa0017790 ffff88041ef71eb8 ffffffffa0013532 ffff88041ef71ec8 ffffffffa00132d6 ffff88041ef71ed8 ffffffffa00159b2 ffff88041ef71f78 ffffffff81073115 656c69666f72706f 0000000000610200 Call Trace: [<ffffffffa0013532>] op_nmi_exit+0x15/0x17 [oprofile] [<ffffffffa00132d6>] oprofile_arch_exit+0xe/0x10 [oprofile] [<ffffffffa00159b2>] oprofile_exit+0x1e/0x20 [oprofile] [<ffffffff81073115>] sys_delete_module+0x1c3/0x22f [<ffffffff811bf09e>] ? trace_hardirqs_on_thunk+0x3a/0x3f [<ffffffff8148070b>] system_call_fastpath+0x16/0x1b Code: 20 c6 78 81 e8 c5 cc 23 00 48 8b 13 48 8b 43 08 48 be 00 01 10 00 00 00 ad de 48 b9 00 02 20 00 00 00 ad de 48 c7 c7 20 c6 78 81 89 42 08 48 89 10 48 89 33 48 89 4b 08 e8 a6 c0 23 00 5a 5b RIP [<ffffffff8123c226>] unregister_syscore_ops+0x41/0x58 RSP <ffff88041ef71e98> CR2: 0000000000000008 ---[ end trace 43a541a52956b7b0 ]--- CC: st...@ke... # 2.6.37+ Signed-off-by: Robert Richter <rob...@am...> --- arch/x86/oprofile/init.c | 7 +++++-- 1 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index cdfe4c5..f148cf6 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -21,6 +21,7 @@ extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +static int nmi_timer; int __init oprofile_arch_init(struct oprofile_operations *ops) { @@ -31,8 +32,9 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) #ifdef CONFIG_X86_LOCAL_APIC ret = op_nmi_init(ops); #endif + nmi_timer = (ret != 0); #ifdef CONFIG_X86_IO_APIC - if (ret < 0) + if (nmi_timer) ret = op_nmi_timer_init(ops); #endif ops->backtrace = x86_backtrace; @@ -44,6 +46,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) void oprofile_arch_exit(void) { #ifdef CONFIG_X86_LOCAL_APIC - op_nmi_exit(); + if (!nmi_timer) + op_nmi_exit(); #endif } -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:49:09
|
The legacy x86 nmi watchdog code was removed with the implementation of the perf based nmi watchdog. This broke Oprofile's nmi timer mode. To run nmi timer mode we relied on a continuous ticking nmi source which the nmi watchdog provided. The nmi tick was no longer available and current watchdog can not be used anymore since it runs with very long periods in the range of seconds. This patch reimplements the nmi timer mode using a perf counter nmi source. V2: * removing pr_info() * fix undefined reference to `__udivdi3' for 32 bit build * fix section mismatch of .cpuinit.data:nmi_timer_cpu_nb * removed nmi timer setup in arch/x86 * implemented function stubs for op_nmi_init/exit() * made code more readable in oprofile_init() Cc: Peter Zijlstra <a.p...@ch...> Signed-off-by: Robert Richter <rob...@am...> --- arch/Kconfig | 4 + arch/x86/oprofile/Makefile | 3 +- arch/x86/oprofile/init.c | 30 ++----- arch/x86/oprofile/nmi_timer_int.c | 66 -------------- drivers/oprofile/nmi_timer_int.c | 173 +++++++++++++++++++++++++++++++++++++ drivers/oprofile/oprof.c | 24 +++--- drivers/oprofile/oprof.h | 9 ++ kernel/events/core.c | 2 + 8 files changed, 208 insertions(+), 103 deletions(-) delete mode 100644 arch/x86/oprofile/nmi_timer_int.c create mode 100644 drivers/oprofile/nmi_timer_int.c diff --git a/arch/Kconfig b/arch/Kconfig index 4b0669c..f01f75c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX config HAVE_OPROFILE bool +config OPROFILE_NMI_TIMER + def_bool y + def_bool PERF_EVENTS && HAVE_PERF_EVENTS_NMI + config KPROBES bool "Kprobes" depends on MODULES diff --git a/arch/x86/oprofile/Makefile b/arch/x86/oprofile/Makefile index 446902b..1599f56 100644 --- a/arch/x86/oprofile/Makefile +++ b/arch/x86/oprofile/Makefile @@ -4,9 +4,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprof.o cpu_buffer.o buffer_sync.o \ event_buffer.o oprofile_files.o \ oprofilefs.o oprofile_stats.o \ - timer_int.o ) + timer_int.o nmi_timer_int.o ) oprofile-y := $(DRIVER_OBJS) init.o backtrace.o oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_amd.o \ op_model_ppro.o op_model_p4.o -oprofile-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c index f148cf6..9e138d0 100644 --- a/arch/x86/oprofile/init.c +++ b/arch/x86/oprofile/init.c @@ -16,37 +16,23 @@ * with the NMI mode driver. */ +#ifdef CONFIG_X86_LOCAL_APIC extern int op_nmi_init(struct oprofile_operations *ops); -extern int op_nmi_timer_init(struct oprofile_operations *ops); extern void op_nmi_exit(void); -extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); +#else +static int op_nmi_init(struct oprofile_operations *ops) { return -ENODEV; } +static void op_nmi_exit(void) { } +#endif -static int nmi_timer; +extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); int __init oprofile_arch_init(struct oprofile_operations *ops) { - int ret; - - ret = -ENODEV; - -#ifdef CONFIG_X86_LOCAL_APIC - ret = op_nmi_init(ops); -#endif - nmi_timer = (ret != 0); -#ifdef CONFIG_X86_IO_APIC - if (nmi_timer) - ret = op_nmi_timer_init(ops); -#endif ops->backtrace = x86_backtrace; - - return ret; + return op_nmi_init(ops); } - void oprofile_arch_exit(void) { -#ifdef CONFIG_X86_LOCAL_APIC - if (!nmi_timer) - op_nmi_exit(); -#endif + op_nmi_exit(); } diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c deleted file mode 100644 index 720bf5a..0000000 --- a/arch/x86/oprofile/nmi_timer_int.c +++ /dev/null @@ -1,66 +0,0 @@ -/** - * @file nmi_timer_int.c - * - * @remark Copyright 2003 OProfile authors - * @remark Read the file COPYING - * - * @author Zwane Mwaikambo <zw...@li...> - */ - -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/errno.h> -#include <linux/oprofile.h> -#include <linux/rcupdate.h> -#include <linux/kdebug.h> - -#include <asm/nmi.h> -#include <asm/apic.h> -#include <asm/ptrace.h> - -static int profile_timer_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct die_args *args = (struct die_args *)data; - int ret = NOTIFY_DONE; - - switch (val) { - case DIE_NMI: - oprofile_add_sample(args->regs, 0); - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -static struct notifier_block profile_timer_exceptions_nb = { - .notifier_call = profile_timer_exceptions_notify, - .next = NULL, - .priority = NMI_LOW_PRIOR, -}; - -static int timer_start(void) -{ - if (register_die_notifier(&profile_timer_exceptions_nb)) - return 1; - return 0; -} - - -static void timer_stop(void) -{ - unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ -} - - -int __init op_nmi_timer_init(struct oprofile_operations *ops) -{ - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); - return 0; -} diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c new file mode 100644 index 0000000..76f1c93 --- /dev/null +++ b/drivers/oprofile/nmi_timer_int.c @@ -0,0 +1,173 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2011 Advanced Micro Devices, Inc. + * + * @author Robert Richter <rob...@am...> + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/oprofile.h> +#include <linux/perf_event.h> + +#ifdef CONFIG_OPROFILE_NMI_TIMER + +static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); +static int ctr_running; + +static struct perf_event_attr nmi_timer_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static void nmi_timer_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + event->hw.interrupts = 0; /* don't throttle interrupts */ + oprofile_add_sample(regs, 0); +} + +static int nmi_timer_start_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (!event) { + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, + nmi_timer_callback, NULL); + if (IS_ERR(event)) + return PTR_ERR(event); + per_cpu(nmi_timer_events, cpu) = event; + } + + if (event && ctr_running) + perf_event_enable(event); + + return 0; +} + +static void nmi_timer_stop_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (event && ctr_running) + perf_event_disable(event); +} + +static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + nmi_timer_start_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nmi_timer_stop_cpu(cpu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block nmi_timer_cpu_nb = { + .notifier_call = nmi_timer_cpu_notifier +}; + +static int nmi_timer_start(void) +{ + int cpu; + + get_online_cpus(); + ctr_running = 1; + for_each_online_cpu(cpu) + nmi_timer_start_cpu(cpu); + put_online_cpus(); + + return 0; +} + +static void nmi_timer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + nmi_timer_stop_cpu(cpu); + ctr_running = 0; + put_online_cpus(); +} + +static void nmi_timer_shutdown(void) +{ + struct perf_event *event; + int cpu; + + get_online_cpus(); + unregister_cpu_notifier(&nmi_timer_cpu_nb); + for_each_possible_cpu(cpu) { + event = per_cpu(nmi_timer_events, cpu); + if (!event) + continue; + perf_event_disable(event); + per_cpu(nmi_timer_events, cpu) = NULL; + perf_event_release_kernel(event); + } + + put_online_cpus(); +} + +static int nmi_timer_setup(void) +{ + int cpu, err; + u64 period; + + /* clock cycles per tick: */ + period = (u64)cpu_khz * 1000; + do_div(period, HZ); + nmi_timer_attr.sample_period = period; + + get_online_cpus(); + err = register_cpu_notifier(&nmi_timer_cpu_nb); + if (err) + goto out; + /* can't attach events to offline cpus: */ + for_each_online_cpu(cpu) { + err = nmi_timer_start_cpu(cpu); + if (err) + break; + } + if (err) + nmi_timer_shutdown(); +out: + put_online_cpus(); + return err; +} + +int __init op_nmi_timer_init(struct oprofile_operations *ops) +{ + int err = 0; + + err = nmi_timer_setup(); + if (err) + return err; + nmi_timer_shutdown(); /* only check, don't alloc */ + + ops->create_files = NULL; + ops->setup = nmi_timer_setup; + ops->shutdown = nmi_timer_shutdown; + ops->start = nmi_timer_start; + ops->stop = nmi_timer_stop; + ops->cpu_type = "timer"; + + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + + return 0; +} + +#endif diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f7cd069..2edab29 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -246,26 +246,24 @@ static int __init oprofile_init(void) int err; /* always init architecture to setup backtrace support */ + timer_mode = 0; err = oprofile_arch_init(&oprofile_ops); + if (!err) { + if (!timer && oprofilefs_register()) + return 0; + oprofile_arch_exit(); + } - timer_mode = err || timer; /* fall back to timer mode on errors */ - if (timer_mode) { - if (!err) - oprofile_arch_exit(); + /* setup timer mode: */ + timer_mode = 1; + /* no nmi timer mode if oprofile.timer is set */ + if (timer || op_nmi_timer_init(&oprofile_ops)) { err = oprofile_timer_init(&oprofile_ops); if (err) return err; } - err = oprofilefs_register(); - if (!err) - return 0; - - /* failed */ - if (!timer_mode) - oprofile_arch_exit(); - - return err; + return oprofilefs_register(); } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 177b73d..769fb0f 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -36,6 +36,15 @@ struct dentry; void oprofile_create_files(struct super_block *sb, struct dentry *root); int oprofile_timer_init(struct oprofile_operations *ops); void oprofile_timer_exit(void); +#ifdef CONFIG_OPROFILE_NMI_TIMER +int op_nmi_timer_init(struct oprofile_operations *ops); +#else +static inline int op_nmi_timer_init(struct oprofile_operations *ops) +{ + return -ENODEV; +} +#endif + int oprofile_set_ulong(unsigned long *addr, unsigned long val); int oprofile_set_timeout(unsigned long time); diff --git a/kernel/events/core.c b/kernel/events/core.c index d1a1bee..d2e28bd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1322,6 +1322,7 @@ retry: } raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_disable); static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, @@ -1806,6 +1807,7 @@ retry: out: raw_spin_unlock_irq(&ctx->lock); } +EXPORT_SYMBOL_GPL(perf_event_enable); int perf_event_refresh(struct perf_event *event, int refresh) { -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:49:50
|
Remove exit functions by moving init/exit code to oprofile's setup/ shutdown functions. Doing so the oprofile module exit code will be easier and less error-prone. Signed-off-by: Robert Richter <rob...@am...> --- drivers/oprofile/oprof.c | 8 ++------ drivers/oprofile/timer_int.c | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f8c752e..f7cd069 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -262,9 +262,7 @@ static int __init oprofile_init(void) return 0; /* failed */ - if (timer_mode) - oprofile_timer_exit(); - else + if (!timer_mode) oprofile_arch_exit(); return err; @@ -274,9 +272,7 @@ static int __init oprofile_init(void) static void __exit oprofile_exit(void) { oprofilefs_unregister(); - if (timer_mode) - oprofile_timer_exit(); - else + if (!timer_mode) oprofile_arch_exit(); } diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 878fba1..93404f7 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -97,24 +97,24 @@ static struct notifier_block __refdata oprofile_cpu_notifier = { .notifier_call = oprofile_cpu_notify, }; -int oprofile_timer_init(struct oprofile_operations *ops) +static int oprofile_hrtimer_setup(void) { - int rc; - - rc = register_hotcpu_notifier(&oprofile_cpu_notifier); - if (rc) - return rc; - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = oprofile_hrtimer_start; - ops->stop = oprofile_hrtimer_stop; - ops->cpu_type = "timer"; - printk(KERN_INFO "oprofile: using timer interrupt.\n"); - return 0; + return register_hotcpu_notifier(&oprofile_cpu_notifier); } -void oprofile_timer_exit(void) +static void oprofile_hrtimer_shutdown(void) { unregister_hotcpu_notifier(&oprofile_cpu_notifier); } + +int oprofile_timer_init(struct oprofile_operations *ops) +{ + ops->create_files = NULL; + ops->setup = oprofile_hrtimer_setup; + ops->shutdown = oprofile_hrtimer_shutdown; + ops->start = oprofile_hrtimer_start; + ops->stop = oprofile_hrtimer_stop; + ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + return 0; +} -- 1.7.7 |
From: Robert R. <rob...@am...> - 2011-10-19 18:49:51
|
We need this to better test x86 NMI timer mode. Otherwise it is very hard to setup systems with NMI timer enabled, but we have this e.g. in virtual machine environments. Signed-off-by: Robert Richter <rob...@am...> --- Documentation/kernel-parameters.txt | 3 +++ arch/x86/oprofile/nmi_int.c | 27 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 854ed5ca..f7735a1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1848,6 +1848,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. arch_perfmon: [X86] Force use of architectural perfmon on Intel CPUs instead of the CPU specific event set. + timer: [X86] Force use of architectural NMI + timer mode (see also oprofile.timer + for generic hr timer mode) oops=panic Always panic on oopses. Default is to just kill the process, but there is a small probability of diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 68894fd..7ca4d43 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -613,24 +613,36 @@ static int __init p4_init(char **cpu_type) return 0; } -static int force_arch_perfmon; -static int force_cpu_type(const char *str, struct kernel_param *kp) +enum __force_cpu_type { + reserved = 0, /* do not force */ + timer, + arch_perfmon, +}; + +static int force_cpu_type; + +static int set_cpu_type(const char *str, struct kernel_param *kp) { - if (!strcmp(str, "arch_perfmon")) { - force_arch_perfmon = 1; + if (!strcmp(str, "timer")) { + force_cpu_type = timer; + printk(KERN_INFO "oprofile: forcing NMI timer mode\n"); + } else if (!strcmp(str, "arch_perfmon")) { + force_cpu_type = arch_perfmon; printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); + } else { + force_cpu_type = 0; } return 0; } -module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); +module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ - if (force_arch_perfmon && cpu_has_arch_perfmon) + if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon) return 0; /* @@ -697,6 +709,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) if (!cpu_has_apic) return -ENODEV; + if (force_cpu_type == timer) + return -ENODEV; + switch (vendor) { case X86_VENDOR_AMD: /* Needs to be at least an Athlon (or hammer in 32bit mode) */ -- 1.7.7 |
From: Ingo M. <mi...@el...> - 2011-10-23 11:29:13
|
* Robert Richter <rob...@am...> wrote: > The legacy x86 nmi watchdog code was removed with the implementation > of the perf based nmi watchdog. This broke Oprofile's nmi timer > mode. To run nmi timer mode we relied on a continuous ticking nmi > source which the nmi watchdog provided. The nmi tick was no longer > available and current watchdog can not be used anymore since it runs > with very long periods in the range of seconds. This patch > reimplements the nmi timer mode using a perf counter nmi source. > > V2: > * removing pr_info() > * fix undefined reference to `__udivdi3' for 32 bit build > * fix section mismatch of .cpuinit.data:nmi_timer_cpu_nb > * removed nmi timer setup in arch/x86 > * implemented function stubs for op_nmi_init/exit() > * made code more readable in oprofile_init() > > Cc: Peter Zijlstra <a.p...@ch...> > Signed-off-by: Robert Richter <rob...@am...> > --- > arch/Kconfig | 4 + > arch/x86/oprofile/Makefile | 3 +- > arch/x86/oprofile/init.c | 30 ++----- > arch/x86/oprofile/nmi_timer_int.c | 66 -------------- > drivers/oprofile/nmi_timer_int.c | 173 +++++++++++++++++++++++++++++++++++++ > drivers/oprofile/oprof.c | 24 +++--- > drivers/oprofile/oprof.h | 9 ++ > kernel/events/core.c | 2 + > 8 files changed, 208 insertions(+), 103 deletions(-) > delete mode 100644 arch/x86/oprofile/nmi_timer_int.c > create mode 100644 drivers/oprofile/nmi_timer_int.c Looks clean to me. Peter, any objections? Thanks, Ingo |
From: Robert R. <rob...@am...> - 2011-11-01 17:54:22
|
On 23.10.11 07:27:28, Ingo Molnar wrote: > > * Robert Richter <rob...@am...> wrote: > > > The legacy x86 nmi watchdog code was removed with the implementation > > of the perf based nmi watchdog. This broke Oprofile's nmi timer > > mode. To run nmi timer mode we relied on a continuous ticking nmi > > source which the nmi watchdog provided. The nmi tick was no longer > > available and current watchdog can not be used anymore since it runs > > with very long periods in the range of seconds. This patch > > reimplements the nmi timer mode using a perf counter nmi source. > > > > V2: > > * removing pr_info() > > * fix undefined reference to `__udivdi3' for 32 bit build > > * fix section mismatch of .cpuinit.data:nmi_timer_cpu_nb > > * removed nmi timer setup in arch/x86 > > * implemented function stubs for op_nmi_init/exit() > > * made code more readable in oprofile_init() > > > > Cc: Peter Zijlstra <a.p...@ch...> > > Signed-off-by: Robert Richter <rob...@am...> > > --- > > arch/Kconfig | 4 + > > arch/x86/oprofile/Makefile | 3 +- > > arch/x86/oprofile/init.c | 30 ++----- > > arch/x86/oprofile/nmi_timer_int.c | 66 -------------- > > drivers/oprofile/nmi_timer_int.c | 173 +++++++++++++++++++++++++++++++++++++ > > drivers/oprofile/oprof.c | 24 +++--- > > drivers/oprofile/oprof.h | 9 ++ > > kernel/events/core.c | 2 + > > 8 files changed, 208 insertions(+), 103 deletions(-) > > delete mode 100644 arch/x86/oprofile/nmi_timer_int.c > > create mode 100644 drivers/oprofile/nmi_timer_int.c > > Looks clean to me. Peter, any objections? Peter, I guess V2 is ok for you too? Thanks, -Robert -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Peter Z. <a.p...@ch...> - 2011-11-10 13:54:48
|
On Sun, 2011-10-23 at 13:27 +0200, Ingo Molnar wrote: > * Robert Richter <rob...@am...> wrote: > > > The legacy x86 nmi watchdog code was removed with the implementation > > of the perf based nmi watchdog. This broke Oprofile's nmi timer > > mode. To run nmi timer mode we relied on a continuous ticking nmi > > source which the nmi watchdog provided. The nmi tick was no longer > > available and current watchdog can not be used anymore since it runs > > with very long periods in the range of seconds. This patch > > reimplements the nmi timer mode using a perf counter nmi source. > > > Looks clean to me. Peter, any objections? No. |