From: Santosh S. <san...@ti...> - 2010-10-23 12:26:08
|
The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. The oprofile is initialised using system timer in absence of hardware counters supports. Oprofile isn't started from userland. In this setup while doing a CPU offline the kernel hangs in infinite for loop inside lock_hrtimer_base() function This happens because as part of oprofile_cpu_notify(, it tries to stop an hrtimer which was never started. These per-cpu hrtimers are started when the oprfile is started. echo 1 > /dev/oprofile/enable This patch fix this issue by adding a state variable so that these hrtimer start/stop is only attempted when oprofile is started Reported-by: Jan Sebastien <s-...@ti...> Signed-off-by: sricharan <r.s...@ti...> Tested-by: sricharan <r.s...@ti...> Signed-off-by: Santosh Shilimkar <san...@ti...> Cc: Robert Richter <rob...@am...> --- drivers/oprofile/timer_int.c | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index dc0ae4d..de487ba 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -21,6 +21,7 @@ #include "oprof.h" static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); +static int oprofile_hrtimer_started; static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) { @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) { struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + if (!oprofile_hrtimer_started) + return; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = oprofile_hrtimer_notify; @@ -42,6 +46,7 @@ static void __oprofile_hrtimer_start(void *unused) static int oprofile_hrtimer_start(void) { + oprofile_hrtimer_started = 1; on_each_cpu(__oprofile_hrtimer_start, NULL, 1); return 0; } @@ -50,6 +55,9 @@ static void __oprofile_hrtimer_stop(int cpu) { struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); + if (!oprofile_hrtimer_started) + return; + hrtimer_cancel(hrtimer); } @@ -59,6 +67,7 @@ static void oprofile_hrtimer_stop(void) for_each_online_cpu(cpu) __oprofile_hrtimer_stop(cpu); + oprofile_hrtimer_started = 0; } static int __cpuinit oprofile_cpu_notify(struct notifier_block *self, -- 1.6.0.4 |
From: Robert R. <rob...@am...> - 2010-10-27 08:07:10
|
On 23.10.10 08:12:06, Santosh Shilimkar wrote: > The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. > The oprofile is initialised using system timer in absence of hardware > counters supports. Oprofile isn't started from userland. > > In this setup while doing a CPU offline the kernel hangs in infinite > for loop inside lock_hrtimer_base() function > > This happens because as part of oprofile_cpu_notify(, it tries to > stop an hrtimer which was never started. These per-cpu hrtimers > are started when the oprfile is started. > echo 1 > /dev/oprofile/enable Indeed, this is true. There is also the case vice versa, the cpu is booted with maxcpus parameter set. When bringing the remaining cpus online the timers are started even if oprofile is not yet enabled. > > This patch fix this issue by adding a state variable so that > these hrtimer start/stop is only attempted when oprofile is > started > > Reported-by: Jan Sebastien <s-...@ti...> > Signed-off-by: sricharan <r.s...@ti...> > Tested-by: sricharan <r.s...@ti...> > Signed-off-by: Santosh Shilimkar <san...@ti...> > > Cc: Robert Richter <rob...@am...> > --- > drivers/oprofile/timer_int.c | 9 +++++++++ > 1 files changed, 9 insertions(+), 0 deletions(-) > > diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c > index dc0ae4d..de487ba 100644 > --- a/drivers/oprofile/timer_int.c > +++ b/drivers/oprofile/timer_int.c > @@ -21,6 +21,7 @@ > #include "oprof.h" > > static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); > +static int oprofile_hrtimer_started; Maybe we rename this in ctr_running. This is similar to the arch/x86 implementation. We don't need the oprofile_ prefix as this is static. 'started' is a bit irritating as we start the timer if the timer is started. > > static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) > { > @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) > { > struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); > > + if (!oprofile_hrtimer_started) > + return; > + > hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > hrtimer->function = oprofile_hrtimer_notify; > > @@ -42,6 +46,7 @@ static void __oprofile_hrtimer_start(void *unused) > > static int oprofile_hrtimer_start(void) > { > + oprofile_hrtimer_started = 1; > on_each_cpu(__oprofile_hrtimer_start, NULL, 1); We must protect on_each_cpu() and the variable assignment with get/put_online_cpus(). See also implementation in arch/x86/oprofile/nmi_int.c. > return 0; > } > @@ -50,6 +55,9 @@ static void __oprofile_hrtimer_stop(int cpu) > { > struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); > > + if (!oprofile_hrtimer_started) > + return; > + > hrtimer_cancel(hrtimer); > } > > @@ -59,6 +67,7 @@ static void oprofile_hrtimer_stop(void) > > for_each_online_cpu(cpu) > __oprofile_hrtimer_stop(cpu); > + oprofile_hrtimer_started = 0; Same here, protect this with get/put_online_cpus(). -Robert > } > > static int __cpuinit oprofile_cpu_notify(struct notifier_block *self, > -- > 1.6.0.4 > > -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Shilimkar, S. <san...@ti...> - 2010-10-27 09:01:44
|
> -----Original Message----- > From: Robert Richter [mailto:rob...@am...] > Sent: Wednesday, October 27, 2010 1:37 PM > To: Shilimkar, Santosh > Cc: opr...@li...; lin...@vg...; R, Sricharan > Subject: Re: [PATCH] oprofile: Fix the hang while offline the cpu > > On 23.10.10 08:12:06, Santosh Shilimkar wrote: > > The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. > > The oprofile is initialised using system timer in absence of hardware > > counters supports. Oprofile isn't started from userland. > > > > In this setup while doing a CPU offline the kernel hangs in infinite > > for loop inside lock_hrtimer_base() function > > > > This happens because as part of oprofile_cpu_notify(, it tries to > > stop an hrtimer which was never started. These per-cpu hrtimers > > are started when the oprfile is started. > > echo 1 > /dev/oprofile/enable > > Indeed, this is true. > > There is also the case vice versa, the cpu is booted with maxcpus > parameter set. When bringing the remaining cpus online the timers are > started even if oprofile is not yet enabled. > > > > > This patch fix this issue by adding a state variable so that > > these hrtimer start/stop is only attempted when oprofile is > > started > > > > Reported-by: Jan Sebastien <s-...@ti...> > > Signed-off-by: sricharan <r.s...@ti...> > > Tested-by: sricharan <r.s...@ti...> > > Signed-off-by: Santosh Shilimkar <san...@ti...> > > > > Cc: Robert Richter <rob...@am...> > > --- > > drivers/oprofile/timer_int.c | 9 +++++++++ > > 1 files changed, 9 insertions(+), 0 deletions(-) > > > > diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c > > index dc0ae4d..de487ba 100644 > > --- a/drivers/oprofile/timer_int.c > > +++ b/drivers/oprofile/timer_int.c > > @@ -21,6 +21,7 @@ > > #include "oprof.h" > > > > static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); > > +static int oprofile_hrtimer_started; > > Maybe we rename this in ctr_running. This is similar to the arch/x86 > implementation. We don't need the oprofile_ prefix as this is static. > 'started' is a bit irritating as we start the timer if the timer is > started. > Will rename it to 'ctr_running' > > > > static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer > *hrtimer) > > { > > @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) > > { > > struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); > > > > + if (!oprofile_hrtimer_started) > > + return; > > + > > hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > > hrtimer->function = oprofile_hrtimer_notify; > > > > @@ -42,6 +46,7 @@ static void __oprofile_hrtimer_start(void *unused) > > > > static int oprofile_hrtimer_start(void) > > { > > + oprofile_hrtimer_started = 1; > > on_each_cpu(__oprofile_hrtimer_start, NULL, 1); > > We must protect on_each_cpu() and the variable assignment with > get/put_online_cpus(). See also implementation in > arch/x86/oprofile/nmi_int.c. > Good point. Will fix this. > > return 0; > > } > > @@ -50,6 +55,9 @@ static void __oprofile_hrtimer_stop(int cpu) > > { > > struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); > > > > + if (!oprofile_hrtimer_started) > > + return; > > + > > hrtimer_cancel(hrtimer); > > } > > > > @@ -59,6 +67,7 @@ static void oprofile_hrtimer_stop(void) > > > > for_each_online_cpu(cpu) > > __oprofile_hrtimer_stop(cpu); > > + oprofile_hrtimer_started = 0; > > Same here, protect this with get/put_online_cpus(). Ok. Will send v2 with above fixes. Regards, Santosh |
From: Santosh S. <san...@ti...> - 2010-10-27 15:17:32
|
The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. The oprofile is initialised using system timer in absence of hardware counters supports. Oprofile isn't started from userland. In this setup while doing a CPU offline the kernel hangs in infinite for loop inside lock_hrtimer_base() function This happens because as part of oprofile_cpu_notify(, it tries to stop an hrtimer which was never started. These per-cpu hrtimers are started when the oprfile is started. echo 1 > /dev/oprofile/enable This problem also existwhen the cpu is booted with maxcpus parameter set. When bringing the remaining cpus online the timers are started even if oprofile is not yet enabled. This patch fix this issue by adding a state variable so that these hrtimer start/stop is only attempted when oprofile is started Reported-by: Jan Sebastien <s-...@ti...> Signed-off-by: sricharan <r.s...@ti...> Tested-by: sricharan <r.s...@ti...> Cc: Robert Richter <rob...@am...> Signed-off-by: Santosh Shilimkar <san...@ti...> --- V2: Updated patch with comments from Robert Richter drivers/oprofile/timer_int.c | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index dc0ae4d..0107251 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -21,6 +21,7 @@ #include "oprof.h" static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); +static int ctr_running; static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) { @@ -33,6 +34,9 @@ static void __oprofile_hrtimer_start(void *unused) { struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + if (!ctr_running) + return; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = oprofile_hrtimer_notify; @@ -42,7 +46,10 @@ static void __oprofile_hrtimer_start(void *unused) static int oprofile_hrtimer_start(void) { + get_online_cpus(); + ctr_running = 1; on_each_cpu(__oprofile_hrtimer_start, NULL, 1); + put_online_cpus(); return 0; } @@ -50,6 +57,9 @@ static void __oprofile_hrtimer_stop(int cpu) { struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); + if (!ctr_running) + return; + hrtimer_cancel(hrtimer); } @@ -57,8 +67,11 @@ static void oprofile_hrtimer_stop(void) { int cpu; + get_online_cpus(); for_each_online_cpu(cpu) __oprofile_hrtimer_stop(cpu); + ctr_running = 0; + put_online_cpus(); } static int __cpuinit oprofile_cpu_notify(struct notifier_block *self, -- 1.6.0.4 |
From: Robert R. <rob...@am...> - 2010-10-28 00:13:15
|
On 27.10.10 11:17:15, Santosh Shilimkar wrote: > The kernel build with CONFIG_OPROFILE and CPU_HOTPLUG enabled. > The oprofile is initialised using system timer in absence of hardware > counters supports. Oprofile isn't started from userland. > > In this setup while doing a CPU offline the kernel hangs in infinite > for loop inside lock_hrtimer_base() function > > This happens because as part of oprofile_cpu_notify(, it tries to > stop an hrtimer which was never started. These per-cpu hrtimers > are started when the oprfile is started. > echo 1 > /dev/oprofile/enable > > This problem also existwhen the cpu is booted with maxcpus parameter > set. When bringing the remaining cpus online the timers are started > even if oprofile is not yet enabled. > > This patch fix this issue by adding a state variable so that > these hrtimer start/stop is only attempted when oprofile is > started > > Reported-by: Jan Sebastien <s-...@ti...> > Signed-off-by: sricharan <r.s...@ti...> > Tested-by: sricharan <r.s...@ti...> > Cc: Robert Richter <rob...@am...> > Signed-off-by: Santosh Shilimkar <san...@ti...> > --- > V2: Updated patch with comments from Robert Richter > drivers/oprofile/timer_int.c | 13 +++++++++++++ > 1 files changed, 13 insertions(+), 0 deletions(-) Applied to oprofile/urgent, thanks Santosh. -Robert -- Advanced Micro Devices, Inc. Operating System Research Center |