From: Li S. <sha...@in...> - 2005-04-12 05:33:41
|
Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use disable/enable_nonboot_cpus API. The S4 part is based on Pavel's original S4 SMP patch. Signed-off-by: Li Shaohua<sha...@in...> --- linux-2.6.11-root/drivers/acpi/Kconfig | 2 linux-2.6.11-root/include/linux/suspend.h | 2 linux-2.6.11-root/kernel/power/Kconfig | 2 linux-2.6.11-root/kernel/power/disk.c | 36 ++++++----- linux-2.6.11-root/kernel/power/main.c | 16 +++-- linux-2.6.11-root/kernel/power/smp.c | 91 +++++++++++------------------- linux-2.6.11-root/kernel/power/swsusp.c | 2 7 files changed, 69 insertions(+), 82 deletions(-) diff -puN drivers/acpi/Kconfig~smp_sleep drivers/acpi/Kconfig --- linux-2.6.11/drivers/acpi/Kconfig~smp_sleep 2005-04-12 11:11:14.884685080 +0800 +++ linux-2.6.11-root/drivers/acpi/Kconfig 2005-04-12 11:11:14.898682952 +0800 @@ -57,7 +57,7 @@ if ACPI_INTERPRETER config ACPI_SLEEP bool "Sleep States (EXPERIMENTAL)" - depends on X86 + depends on X86 && (!SMP || HOTPLUG_CPU) depends on EXPERIMENTAL default y ---help--- diff -puN include/linux/suspend.h~smp_sleep include/linux/suspend.h --- linux-2.6.11/include/linux/suspend.h~smp_sleep 2005-04-12 11:11:14.885684928 +0800 +++ linux-2.6.11-root/include/linux/suspend.h 2005-04-12 11:11:14.898682952 +0800 @@ -58,7 +58,7 @@ static inline int software_suspend(void) } #endif -#ifdef CONFIG_SMP +#ifdef CONFIG_HOTPLUG_CPU extern void disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else diff -puN kernel/power/disk.c~smp_sleep kernel/power/disk.c --- linux-2.6.11/kernel/power/disk.c~smp_sleep 2005-04-12 11:11:14.887684624 +0800 +++ linux-2.6.11-root/kernel/power/disk.c 2005-04-12 11:11:14.899682800 +0800 @@ -117,8 +117,8 @@ static void finish(void) { device_resume(); platform_finish(); - enable_nonboot_cpus(); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -131,28 +131,36 @@ static int prepare_processes(void) sys_sync(); + disable_nonboot_cpus(); + if (freeze_processes()) { error = -EBUSY; - return error; + goto enable_cpu; } if (pm_disk_mode == PM_DISK_PLATFORM) { if (pm_ops && pm_ops->prepare) { if ((error = pm_ops->prepare(PM_SUSPEND_DISK))) - return error; + goto thaw; } } /* Free memory before shutting down devices. */ free_some_memory(); - return 0; +thaw: + thaw_processes(); +enable_cpu: + enable_nonboot_cpus(); + pm_restore_console(); + return error; } static void unprepare_processes(void) { - enable_nonboot_cpus(); + platform_finish(); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -160,15 +168,9 @@ static int prepare_devices(void) { int error; - disable_nonboot_cpus(); - if ((error = device_suspend(PMSG_FREEZE))) { + if ((error = device_suspend(PMSG_FREEZE))) printk("Some devices failed to suspend\n"); - platform_finish(); - enable_nonboot_cpus(); - return error; - } - - return 0; + return error; } /** @@ -185,9 +187,9 @@ int pm_suspend_disk(void) int error; error = prepare_processes(); - if (!error) { - error = prepare_devices(); - } + if (error) + return error; + error = prepare_devices(); if (error) { unprepare_processes(); @@ -250,7 +252,7 @@ static int software_resume(void) if ((error = prepare_processes())) { swsusp_close(); - goto Cleanup; + goto Done; } pr_debug("PM: Reading swsusp image.\n"); diff -puN kernel/power/Kconfig~smp_sleep kernel/power/Kconfig --- linux-2.6.11/kernel/power/Kconfig~smp_sleep 2005-04-12 11:11:14.888684472 +0800 +++ linux-2.6.11-root/kernel/power/Kconfig 2005-04-12 11:11:14.899682800 +0800 @@ -28,7 +28,7 @@ config PM_DEBUG config SOFTWARE_SUSPEND bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP + depends on EXPERIMENTAL && PM && SWAP && (HOTPLUG_CPU || !SMP) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. diff -puN kernel/power/main.c~smp_sleep kernel/power/main.c --- linux-2.6.11/kernel/power/main.c~smp_sleep 2005-04-12 11:11:14.890684168 +0800 +++ linux-2.6.11-root/kernel/power/main.c 2005-04-12 11:11:14.899682800 +0800 @@ -59,6 +59,13 @@ static int suspend_prepare(suspend_state pm_prepare_console(); + disable_nonboot_cpus(); + + if (num_online_cpus() != 1) { + error = -EPERM; + goto Enable_cpu; + } + if (freeze_processes()) { error = -EAGAIN; goto Thaw; @@ -89,6 +96,8 @@ static int suspend_prepare(suspend_state pm_ops->finish(state); Thaw: thaw_processes(); + Enable_cpu: + enable_nonboot_cpus(); pm_restore_console(); return error; } @@ -127,6 +136,7 @@ static void suspend_finish(suspend_state if (pm_ops && pm_ops->finish) pm_ops->finish(state); thaw_processes(); + enable_nonboot_cpus(); pm_restore_console(); } @@ -164,12 +174,6 @@ static int enter_state(suspend_state_t s goto Unlock; } - /* Suspend is hard to get right on SMP. */ - if (num_online_cpus() != 1) { - error = -EPERM; - goto Unlock; - } - pr_debug("PM: Preparing system for suspend\n"); if ((error = suspend_prepare(state))) goto Unlock; diff -puN kernel/power/smp.c~smp_sleep kernel/power/smp.c --- linux-2.6.11/kernel/power/smp.c~smp_sleep 2005-04-12 11:11:14.891684016 +0800 +++ linux-2.6.11-root/kernel/power/smp.c 2005-04-12 11:11:14.899682800 +0800 @@ -13,73 +13,52 @@ #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/module.h> +#include <linux/cpu.h> #include <asm/atomic.h> #include <asm/tlbflush.h> -static atomic_t cpu_counter, freeze; - - -static void smp_pause(void * data) -{ - struct saved_context ctxt; - __save_processor_state(&ctxt); - printk("Sleeping in:\n"); - dump_stack(); - atomic_inc(&cpu_counter); - while (atomic_read(&freeze)) { - /* FIXME: restore takes place at random piece inside this. - This should probably be written in assembly, and - preserve general-purpose registers, too - - What about stack? We may need to move to new stack here. - - This should better be ran with interrupts disabled. - */ - cpu_relax(); - barrier(); - } - atomic_dec(&cpu_counter); - __restore_processor_state(&ctxt); -} - -static cpumask_t oldmask; +/* This is protected by pm_sem semaphore */ +static cpumask_t frozen_cpus; void disable_nonboot_cpus(void) { - oldmask = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(0)); - printk("Freezing CPUs (at %d)", _smp_processor_id()); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ); - printk("..."); - BUG_ON(_smp_processor_id() != 0); - - /* FIXME: for this to work, all the CPUs must be running - * "idle" thread (or we deadlock). Is that guaranteed? */ - - atomic_set(&cpu_counter, 0); - atomic_set(&freeze, 1); - smp_call_function(smp_pause, NULL, 0, 0); - while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) { - cpu_relax(); - barrier(); + int cpu, error; + + error = 0; + cpus_clear(frozen_cpus); + printk("Freezing cpus ...\n"); + for_each_online_cpu(cpu) { + if (cpu == 0) + continue; + error = cpu_down(cpu); + if (!error) { + cpu_set(cpu, frozen_cpus); + printk("CPU%d is down\n", cpu); + continue; + } + printk("Error taking cpu %d down: %d\n", cpu, error); } - printk("ok\n"); + BUG_ON(smp_processor_id() != 0); + if (error) + panic("cpus not sleeping"); } void enable_nonboot_cpus(void) { - printk("Restarting CPUs"); - atomic_set(&freeze, 0); - while (atomic_read(&cpu_counter)) { - cpu_relax(); - barrier(); - } - printk("..."); - set_cpus_allowed(current, oldmask); - schedule(); - printk("ok\n"); + int cpu, error; + printk("Thawing cpus ...\n"); + for_each_cpu_mask(cpu, frozen_cpus) { + error = smp_prepare_cpu(cpu); + if (!error) + error = cpu_up(cpu); + if (!error) { + printk("CPU%d is up\n", cpu); + continue; + } + printk("Error taking cpu %d up: %d\n", cpu, error); + panic("Not enough cpus"); + } + cpus_clear(frozen_cpus); } - diff -puN kernel/power/swsusp.c~smp_sleep kernel/power/swsusp.c --- linux-2.6.11/kernel/power/swsusp.c~smp_sleep 2005-04-12 11:11:14.892683864 +0800 +++ linux-2.6.11-root/kernel/power/swsusp.c 2005-04-12 11:11:14.900682648 +0800 @@ -1194,8 +1194,10 @@ static const char * sanity_check(void) return "version"; if (strcmp(swsusp_info.uts.machine,system_utsname.machine)) return "machine"; +#if 0 if(swsusp_info.cpus != num_online_cpus()) return "number of cpus"; +#endif return NULL; } _ |
From: Pavel M. <pa...@su...> - 2005-04-12 09:44:29
|
Hi! > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's > original S4 SMP patch. The series looks good to me, but I was not yet able to actually try it. Will try to do that in few hours. Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Pavel M. <pa...@uc...> - 2005-04-12 10:51:44
|
Hi! > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's > original S4 SMP patch. I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded image from disk, but then I got Thawing cpus .... Booting processor 1/0 eip 3000 ...and very funny effect on keyboard leds. They started to blink (panic-like), but with very wrong frequency. It looked like 2 cpus doing panic blinks at once... Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Li S. <sha...@in...> - 2005-04-12 12:42:58
|
On Tue, 2005-04-12 at 18:51, Pavel Machek wrote: > > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use > > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's > > original S4 SMP patch. > > I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded > image from disk, but then I got > > Thawing cpus .... > Booting processor 1/0 eip 3000 > > ...and very funny effect on keyboard leds. They started to blink > (panic-like), but with very wrong frequency. It looked like 2 cpus > doing panic blinks at once... Check if /sys/device/system/cpu/cpu1/online attribute works. If it works, then it's other issue. I only tested the patches in two HT based systems. Thanks, Shaohua |
From: Pavel M. <pa...@uc...> - 2005-04-13 08:32:34
|
Hi! > > > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use > > > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's > > > original S4 SMP patch. > > > > I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded > > image from disk, but then I got > > > > Thawing cpus .... > > Booting processor 1/0 eip 3000 > > > > ...and very funny effect on keyboard leds. They started to blink > > (panic-like), but with very wrong frequency. It looked like 2 cpus > > doing panic blinks at once... > Check if /sys/device/system/cpu/cpu1/online attribute works. If it > works, then it's other issue. I only tested the patches in two HT based > systems. Ok, this is PIII system 550MHz system: root@hobit:/home/pavel# cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 551.309 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse bogomips : 1085.44 core id : 255 cpu cores : 1 processor : 1 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 551.309 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse bogomips : 1097.72 core id : 255 cpu cores : 1 root@hobit:/home/pavel# Offlining CPU seems to work ok: root@hobit:/sys/devices/system/cpu/cpu1# cat online 1 root@hobit:/sys/devices/system/cpu/cpu1# echo 0 > online root@hobit:/sys/devices/system/cpu/cpu1# sync root@hobit:/sys/devices/system/cpu/cpu1# cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 6 model : 7 model name : Pentium III (Katmai) stepping : 3 cpu MHz : 551.309 cache size : 512 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 2 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 mmx fxsr sse bogomips : 1085.44 core id : 255 cpu cores : 1 root@hobit:/sys/devices/system/cpu/cpu1# Putting cpu back online, I get: Booting processor 1/0 eip 3000 on console, and mess in syslog. Seems like my cpu #1 panicked but cpu #0 just keeps going?! root@hobit:/sys/devices/system/cpu/cpu1# dmesg | tail -20 [<c02c758f>] vt_console_print+0x24f/0x260 [<c02c7340>] vt_console_print+0x0/0x260 [<c01247e7>] __call_console_drivers+0x57/0x60 [<c01248e0>] call_console_drivers+0x80/0x110 [<c0124d8e>] release_console_sem+0x4e/0xc0 [<c0124c12>] vprintk+0x192/0x240 [<c0528891>] preempt_schedule_irq+0x51/0x80 [<c02adeca>] acpi_processor_idle+0x0/0x265 [<c010325e>] need_resched+0x1f/0x21 [<c02adeca>] acpi_processor_idle+0x0/0x265 [<c0124a77>] printk+0x17/0x20 [<c010b583>] cpu_init+0x73/0x360 [<c0117bd6>] start_secondary+0x6/0x170 Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 <0>Kernel panic - not syncing: Attempted to kill the idle task! Stuck ?? Inquiring remote APIC #0... ... APIC #0 ID: 00000000 ... APIC #0 VERSION: 00040011 ... APIC #0 SPIV: 000000ff root@hobit:/sys/devices/system/cpu/cpu1# dmesg | tail -25 [<c011d001>] activate_task+0x1/0xa0 [<c011d128>] resched_task+0x68/0x90 [<c011d8ba>] try_to_wake_up+0x2aa/0x2f0 [<c025ed7a>] fbcon_cursor+0x19a/0x270 [<c02c4958>] hide_cursor+0x18/0x30 [<c02c758f>] vt_console_print+0x24f/0x260 [<c02c7340>] vt_console_print+0x0/0x260 [<c01247e7>] __call_console_drivers+0x57/0x60 [<c01248e0>] call_console_drivers+0x80/0x110 [<c0124d8e>] release_console_sem+0x4e/0xc0 [<c0124c12>] vprintk+0x192/0x240 [<c0528891>] preempt_schedule_irq+0x51/0x80 [<c02adeca>] acpi_processor_idle+0x0/0x265 [<c010325e>] need_resched+0x1f/0x21 [<c02adeca>] acpi_processor_idle+0x0/0x265 [<c0124a77>] printk+0x17/0x20 [<c010b583>] cpu_init+0x73/0x360 [<c0117bd6>] start_secondary+0x6/0x170 Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 <0>Kernel panic - not syncing: Attempted to kill the idle task! Stuck ?? Inquiring remote APIC #0... ... APIC #0 ID: 00000000 ... APIC #0 VERSION: 00040011 ... APIC #0 SPIV: 000000ff root@hobit:/sys/devices/system/cpu/cpu1# Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Li S. <sha...@in...> - 2005-04-14 08:30:03
Attachments:
ht
|
On Wed, 2005-04-13 at 16:32, Pavel Machek wrote: > root@hobit:/sys/devices/system/cpu/cpu1# dmesg | tail -25 > [<c011d001>] activate_task+0x1/0xa0 > [<c011d128>] resched_task+0x68/0x90 > [<c011d8ba>] try_to_wake_up+0x2aa/0x2f0 > [<c025ed7a>] fbcon_cursor+0x19a/0x270 > [<c02c4958>] hide_cursor+0x18/0x30 > [<c02c758f>] vt_console_print+0x24f/0x260 > [<c02c7340>] vt_console_print+0x0/0x260 > [<c01247e7>] __call_console_drivers+0x57/0x60 > [<c01248e0>] call_console_drivers+0x80/0x110 > [<c0124d8e>] release_console_sem+0x4e/0xc0 > [<c0124c12>] vprintk+0x192/0x240 > [<c0528891>] preempt_schedule_irq+0x51/0x80 > [<c02adeca>] acpi_processor_idle+0x0/0x265 > [<c010325e>] need_resched+0x1f/0x21 > [<c02adeca>] acpi_processor_idle+0x0/0x265 > [<c0124a77>] printk+0x17/0x20 > [<c010b583>] cpu_init+0x73/0x360 > [<c0117bd6>] start_secondary+0x6/0x170 > Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e > 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> > a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 > <0>Kernel panic - not syncing: Attempted to kill the idle task! > Stuck ?? > Inquiring remote APIC #0... > ... APIC #0 ID: 00000000 > ... APIC #0 VERSION: 00040011 > ... APIC #0 SPIV: 000000ff > root@hobit:/sys/devices/system/cpu/cpu1# Andrew, Below patch fixed Pavel's oops. But strange is the 'system_state' check is added for CPU hotplug by Rusty. This really makes me confused. Could you please look at it. This can be reproduced 100% with radeonfb driver load. Attached is the dmesg of an oops. It seems the 'objp' parameter for 'cache_alloc_debugcheck_after' is invalid. Thanks, Shaohua --- a/kernel/printk.c 2005-04-12 10:12:19.000000000 +0800 +++ b/kernel/printk.c 2005-04-13 17:22:40.912897328 +0800 @@ -624,8 +624,7 @@ asmlinkage int vprintk(const char *fmt, log_level_unknown = 1; } - if (!cpu_online(smp_processor_id()) && - system_state != SYSTEM_RUNNING) { + if (!cpu_online(smp_processor_id())) { /* * Some console drivers may assume that per-cpu resources have * been allocated. So don't allow them to be called by this |
From: Li S. <sha...@in...> - 2005-04-14 08:42:58
|
On Thu, 2005-04-14 at 16:27, Li Shaohua wrote: > On Wed, 2005-04-13 at 16:32, Pavel Machek wrote: > > root@hobit:/sys/devices/system/cpu/cpu1# dmesg | tail -25 > > [<c011d001>] activate_task+0x1/0xa0 > > [<c011d128>] resched_task+0x68/0x90 > > [<c011d8ba>] try_to_wake_up+0x2aa/0x2f0 > > [<c025ed7a>] fbcon_cursor+0x19a/0x270 > > [<c02c4958>] hide_cursor+0x18/0x30 > > [<c02c758f>] vt_console_print+0x24f/0x260 > > [<c02c7340>] vt_console_print+0x0/0x260 > > [<c01247e7>] __call_console_drivers+0x57/0x60 > > [<c01248e0>] call_console_drivers+0x80/0x110 > > [<c0124d8e>] release_console_sem+0x4e/0xc0 > > [<c0124c12>] vprintk+0x192/0x240 > > [<c0528891>] preempt_schedule_irq+0x51/0x80 > > [<c02adeca>] acpi_processor_idle+0x0/0x265 > > [<c010325e>] need_resched+0x1f/0x21 > > [<c02adeca>] acpi_processor_idle+0x0/0x265 > > [<c0124a77>] printk+0x17/0x20 > > [<c010b583>] cpu_init+0x73/0x360 > > [<c0117bd6>] start_secondary+0x6/0x170 > > Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e > > 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> > > a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 > > <0>Kernel panic - not syncing: Attempted to kill the idle task! > > Stuck ?? > > Inquiring remote APIC #0... > > ... APIC #0 ID: 00000000 > > ... APIC #0 VERSION: 00040011 > > ... APIC #0 SPIV: 000000ff > > root@hobit:/sys/devices/system/cpu/cpu1# > Andrew, > Below patch fixed Pavel's oops. But strange is the 'system_state' check > is added for CPU hotplug by Rusty. This really makes me confused. Could > you please look at it. > This can be reproduced 100% with radeonfb driver load. Attached is the > dmesg of an oops. It seems the 'objp' parameter for > 'cache_alloc_debugcheck_after' is invalid. Looks the per-cpu array_cache isn't initialized. It's initialized in a cpuhotplug callback. So before the CPU call cpu_up, all kmalloc will failed. Isn't it? Thanks, Shaohua |
From: Andrew M. <ak...@os...> - 2005-04-23 22:35:30
|
Li Shaohua <sha...@in...> wrote: > > CPU0 attaching NULL sched-domain. > CPU1 attaching NULL sched-domain. > CPU0 attaching NULL sched-domain. > Booting processor 1/1 eip 3000 > Initializing CPU#1 > masked ExtINT on CPU#1 > Unable to handle kernel paging request at virtual address f000acb2 > printing eip: > c014e4cc > *pde = 00000000 > Oops: 0000 [#1] > PREEMPT SMP > Modules linked in: > CPU: 1 > EIP: 0060:[<c014e4cc>] Not tainted VLI > EFLAGS: 00010097 (2.6.12-rc2-mm3) > EIP is at check_poison_obj+0x4c/0x1e0 > eax: 0000006b ebx: 0000005a ecx: dff6e080 edx: dff6e480 > esi: 00000000 edi: f000acb2 ebp: 00000080 esp: c14fdcd4 > ds: 007b es: 007b ss: 0068 > Process swapper (pid: 0, threadinfo=c14fc000 task=dff42560) > Stack: dff6e480 5a5a5a5a 5a5a5a5a 0000007f 5a5a5a5a 00000000 0000005a f000acae > dff6e480 c021192e c0150031 dff6e480 f000acae 5a5a5a5a 5a5a5a5a dff6e480 > 00000046 00000020 00000010 c015044b dff6e480 00000020 f000acae c021192e > Call Trace: > [<c021192e>] soft_cursor+0x5e/0x260 > [<c0150031>] cache_alloc_debugcheck_after+0x181/0x1a0 > [<c015044b>] __kmalloc+0x9b/0xd0 > [<c021192e>] soft_cursor+0x5e/0x260 > [<c021192e>] soft_cursor+0x5e/0x260 > [<c020a459>] bit_cursor+0x339/0x540 > [<c0118998>] recalc_task_prio+0x88/0x150 > [<c0205c32>] fbcon_cursor+0x1a2/0x270 > [<c0265475>] hide_cursor+0x25/0x40 > [<c026843a>] vt_console_print+0x2aa/0x2b0 > [<c0120d32>] __call_console_drivers+0x62/0x70 > [<c0120e66>] call_console_drivers+0x96/0x130 > [<c0121361>] release_console_sem+0x51/0xc0 > [<c01211df>] vprintk+0x19f/0x250 > [<c01264b6>] __do_softirq+0xd6/0xf0 > [<c0438f5b>] preempt_schedule_irq+0x4b/0x80 > [<c0121037>] printk+0x17/0x20 > [<c0114132>] setup_local_APIC+0xe2/0x1d0 > [<c01130ba>] smp_callin+0x7a/0x120 > [<c011316e>] start_secondary+0xe/0x190 > Code: 24 30 89 14 24 01 c7 e8 13 f8 ff ff 39 44 24 14 89 c5 0f 8d b7 00 00 00 8d 40 ff 89 44 24 0c 3b 74 24 0c b0 6b 0f 84 8c 01 00 00 <38> 04 3e 74 46 8b 44 24 14 85 c0 0f 84 48 01 00 00 89 3c 24 83 > <0>Kernel panic - not syncing: Attempted to kill the idle task! > fbcon is trying to call kmalloc before the slab system is initialised. It would be best to fix that within fbcon. > > Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e > > 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> > > a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 > > <0>Kernel panic - not syncing: Attempted to kill the idle task! > > Stuck ?? > > Inquiring remote APIC #0... > > ... APIC #0 ID: 00000000 > > ... APIC #0 VERSION: 00040011 > > ... APIC #0 SPIV: 000000ff > > root@hobit:/sys/devices/system/cpu/cpu1# > Andrew, > Below patch fixed Pavel's oops. But strange is the 'system_state' check > is added for CPU hotplug by Rusty. This really makes me confused. Could > you please look at it. Well as the comment says, if this CPU isn't online yet, and if the system has not yet reached SYSTEM_RUNNING state then we bale out of the printk because this cpu's per-cpu resources may not yet be fully set up. > This can be reproduced 100% with radeonfb driver load. Attached is the > dmesg of an oops. It seems the 'objp' parameter for > 'cache_alloc_debugcheck_after' is invalid. > > --- a/kernel/printk.c 2005-04-12 10:12:19.000000000 +0800 > +++ b/kernel/printk.c 2005-04-13 17:22:40.912897328 +0800 > @@ -624,8 +624,7 @@ asmlinkage int vprintk(const char *fmt, > log_level_unknown = 1; > } > > - if (!cpu_online(smp_processor_id()) && > - system_state != SYSTEM_RUNNING) { > + if (!cpu_online(smp_processor_id())) { > /* > * Some console drivers may assume that per-cpu resources have > * been allocated. So don't allow them to be called by this > That being said, I don't see why your patch should fix the oops. The test (system_state != SYSTEM_RUNNING) should be returning true at this time. |
From: Pavel M. <pa...@uc...> - 2005-04-23 22:42:22
|
Hi! > > > Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e > > > 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 <f3> > > > a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89 > > > <0>Kernel panic - not syncing: Attempted to kill the idle task! > > > Stuck ?? > > > Inquiring remote APIC #0... > > > ... APIC #0 ID: 00000000 > > > ... APIC #0 VERSION: 00040011 > > > ... APIC #0 SPIV: 000000ff > > > root@hobit:/sys/devices/system/cpu/cpu1# > > Andrew, > > Below patch fixed Pavel's oops. But strange is the 'system_state' check > > is added for CPU hotplug by Rusty. This really makes me confused. Could > > you please look at it. > > Well as the comment says, if this CPU isn't online yet, and if the system > has not yet reached SYSTEM_RUNNING state then we bale out of the printk > because this cpu's per-cpu resources may not yet be fully set up. > > > > This can be reproduced 100% with radeonfb driver load. Attached is the > > dmesg of an oops. It seems the 'objp' parameter for > > 'cache_alloc_debugcheck_after' is invalid. > > > > --- a/kernel/printk.c 2005-04-12 10:12:19.000000000 +0800 > > +++ b/kernel/printk.c 2005-04-13 17:22:40.912897328 +0800 > > @@ -624,8 +624,7 @@ asmlinkage int vprintk(const char *fmt, > > log_level_unknown = 1; > > } > > > > - if (!cpu_online(smp_processor_id()) && > > - system_state != SYSTEM_RUNNING) { > > + if (!cpu_online(smp_processor_id())) { > > /* > > * Some console drivers may assume that per-cpu resources have > > * been allocated. So don't allow them to be called by this > > > > That being said, I don't see why your patch should fix the oops. The test > (system_state != SYSTEM_RUNNING) should be returning true at this time. We were booting CPU on running system. system_state was definitely SYSTEM_RUNNING, but we were trying to bring CPU#1 online... Check is wrong. If cpu is not yet online, we may not printk on it... It does not really depend on system_state... Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Li S. <sha...@in...> - 2005-04-25 00:52:24
|
On Sun, 2005-04-24 at 06:35, Andrew Morton wrote: > fbcon is trying to call kmalloc before the slab system is initialised. > It would be best to fix that within fbcon. This solves the problem too. > Well as the comment says, if this CPU isn't online yet, and if the system > has not yet reached SYSTEM_RUNNING state then we bale out of the printk > because this cpu's per-cpu resources may not yet be fully set up. After the CPU is offline (we are doing CPU hotplug). Its per-cpu resources like kmalloc also aren't initialized. This is in SYSTEM_RUNNING state too, so the system state doesn't matter. Removing the system state check seems better than fixing the fbcon driver to me. Thanks, Shaohua |
From: Andrew M. <ak...@os...> - 2005-04-28 07:23:26
|
Li Shaohua <sha...@in...> wrote: > > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's > original S4 SMP patch. On ia64, with tiger_defconfig: kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': : undefined reference to `disable_nonboot_cpus' kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': : undefined reference to `enable_nonboot_cpus' kernel/built-in.o(.text+0x5a222): In function `suspend_finish': : undefined reference to `enable_nonboot_cpus' Signed-off-by: Andrew Morton <ak...@os...> --- include/linux/suspend.h | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) diff -puN include/linux/suspend.h~suspend-resume-smp-support-fix include/linux/suspend.h --- 25/include/linux/suspend.h~suspend-resume-smp-support-fix Thu Apr 28 15:12:21 2005 +++ 25-akpm/include/linux/suspend.h Thu Apr 28 15:13:13 2005 @@ -58,7 +58,7 @@ static inline int software_suspend(void) } #endif -#ifdef CONFIG_HOTPLUG_CPU +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SOFTWARE_SUSPEND) extern void disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else _ |
From: Li S. <sha...@in...> - 2005-04-28 07:31:09
|
On Thu, 2005-04-28 at 15:22, Andrew Morton wrote: > -#ifdef CONFIG_HOTPLUG_CPU > +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SOFTWARE_SUSPEND) ACPI_SLEEP also requires it. So it will be #if defined(CONFIG_HOTPLUG_CPU) && (defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)) Thanks, Shaohua |
From: Andrew M. <ak...@os...> - 2005-04-28 07:41:36
|
Li Shaohua <sha...@in...> wrote: > > On Thu, 2005-04-28 at 15:22, Andrew Morton wrote: > > -#ifdef CONFIG_HOTPLUG_CPU > > +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_SOFTWARE_SUSPEND) > ACPI_SLEEP also requires it. So it will be > #if defined(CONFIG_HOTPLUG_CPU) && (defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)) > But does setting CONFIG_ACPI_SLEEP cause kernel/power/smp.o to be actually compiled and linked? I don't think so? Anyway, please send a tested fix. |
From: Li S. <sha...@in...> - 2005-04-28 07:38:12
|
On Thu, 2005-04-28 at 15:22, Andrew Morton wrote: > > On ia64, with tiger_defconfig: > > kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': > : undefined reference to `disable_nonboot_cpus' > kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': > : undefined reference to `enable_nonboot_cpus' > kernel/built-in.o(.text+0x5a222): In function `suspend_finish': > : undefined reference to `enable_nonboot_cpus' Pavel, Could IA64 do software suspend? There possibly are other troubles here. Thanks, Shaohua |
From: Pavel M. <pa...@uc...> - 2005-04-28 07:42:26
|
Hi! > > On ia64, with tiger_defconfig: > > > > kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': > > : undefined reference to `disable_nonboot_cpus' > > kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': > > : undefined reference to `enable_nonboot_cpus' > > kernel/built-in.o(.text+0x5a222): In function `suspend_finish': > > : undefined reference to `enable_nonboot_cpus' > Pavel, > Could IA64 do software suspend? There possibly are other troubles here. Someone would have to write low-level support. Bring me ia64 notebook and I'll do it ;-)))))))))))))))))))). Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Yu, L. <lum...@in...> - 2005-04-28 08:11:11
|
On Thursday 28 April 2005 15:42, Pavel Machek wrote: > Hi! > > > > On ia64, with tiger_defconfig: > > > > > > kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': > > > : undefined reference to `disable_nonboot_cpus' > > > > > > kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': > > > : undefined reference to `enable_nonboot_cpus' > > > > > > kernel/built-in.o(.text+0x5a222): In function `suspend_finish': > > > : undefined reference to `enable_nonboot_cpus' > > > > Pavel, > > Could IA64 do software suspend? There possibly are other troubles here. > > Someone would have to write low-level support. Bring me ia64 notebook > and I'll do it ;-)))))))))))))))))))). > =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0= =A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0= =A0=A0 =A0=A0=A0=A0=A0=A0=A0 Pavel I just checked DSDT on one ia64 box, it has S0, S4, S5 object. So, the platform should have sufficient support for sleep-to-disk. Thanks, Luming |
From: Pavel M. <pa...@uc...> - 2005-04-28 08:21:00
|
Hi! > > > > On ia64, with tiger_defconfig: > > > > > > > > kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': > > > > : undefined reference to `disable_nonboot_cpus' > > > > > > > > kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': > > > > : undefined reference to `enable_nonboot_cpus' > > > > > > > > kernel/built-in.o(.text+0x5a222): In function `suspend_finish': > > > > : undefined reference to `enable_nonboot_cpus' > > > > > > Pavel, > > > Could IA64 do software suspend? There possibly are other troubles here. > > > > Someone would have to write low-level support. Bring me ia64 notebook > > and I'll do it ;-)))))))))))))))))))). > > I just checked DSDT on one ia64 box, it has S0, S4, S5 object. > So, the platform should have sufficient support for sleep-to-disk. No arguments about that one... Something like arch/i386/power/{swsusp.S,cpu.c} needs to be written for ia64, then. I'm not goot at ia64 assembly (never done that), and do not have ia64 machine nearby, so I'm probably not doing that work. Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Yu, L. <lum...@in...> - 2005-04-28 08:34:51
|
On Thursday 28 April 2005 16:20, Pavel Machek wrote: > Hi! > > > > > > On ia64, with tiger_defconfig: > > > > > > > > > > kernel/built-in.o(.text+0x59e12): In function `suspend_prepare': > > > > > : undefined reference to `disable_nonboot_cpus' > > > > > > > > > > kernel/built-in.o(.text+0x59e62): In function `suspend_prepare': > > > > > : undefined reference to `enable_nonboot_cpus' > > > > > > > > > > kernel/built-in.o(.text+0x5a222): In function `suspend_finish': > > > > > : undefined reference to `enable_nonboot_cpus' > > > > > > > > Pavel, > > > > Could IA64 do software suspend? There possibly are other troubles > > > > here. > > > > > > Someone would have to write low-level support. Bring me ia64 notebook > > > and I'll do it ;-)))))))))))))))))))). > > > > I just checked DSDT on one ia64 box, it has S0, S4, S5 object. > > So, the platform should have sufficient support for sleep-to-disk. > > No arguments about that one... Something like > arch/i386/power/{swsusp.S,cpu.c} needs to be written for ia64, > then. I'm not goot at ia64 assembly (never done that), and do not have > ia64 machine nearby, so I'm probably not doing that work. > > Pavel It sounds not a big deal just to rewrite this two files. Let me try. :-) Thanks, Luming |
From: Li S. <sha...@in...> - 2005-04-28 08:32:11
|
On Thu, 2005-04-28 at 15:41, Andrew Morton wrote: > > But does setting CONFIG_ACPI_SLEEP cause kernel/power/smp.o to be actually > compiled and linked? I don't think so? > > Anyway, please send a tested fix. Ha, this one should be ok. Only IA32 support SMP suspend now. Thanks, Shaohua --- linux-2.6.11-root/drivers/acpi/Kconfig | 2 +- linux-2.6.11-root/include/linux/suspend.h | 2 +- linux-2.6.11-root/kernel/power/Kconfig | 6 +++++- linux-2.6.11-root/kernel/power/Makefile | 6 +++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff -puN kernel/power/Makefile~kconfig kernel/power/Makefile --- linux-2.6.11/kernel/power/Makefile~kconfig 2005-04-28 15:47:44.862281448 +0800 +++ linux-2.6.11-root/kernel/power/Makefile 2005-04-28 16:12:04.285415408 +0800 @@ -3,9 +3,9 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -swsusp-smp-$(CONFIG_SMP) += smp.o - obj-y := main.o process.o console.o pm.o -obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o + +obj-$(CONFIG_SUSPEND_SMP) += smp.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff -puN kernel/power/Kconfig~kconfig kernel/power/Kconfig --- linux-2.6.11/kernel/power/Kconfig~kconfig 2005-04-28 15:48:45.195109464 +0800 +++ linux-2.6.11-root/kernel/power/Kconfig 2005-04-28 16:24:22.821140912 +0800 @@ -28,7 +28,7 @@ config PM_DEBUG config SOFTWARE_SUSPEND bool "Software Suspend (EXPERIMENTAL)" - depends on EXPERIMENTAL && PM && SWAP && (HOTPLUG_CPU || !SMP) + depends on EXPERIMENTAL && PM && SWAP && (SUSPEND_SMP || !SMP) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. @@ -72,3 +72,7 @@ config PM_STD_PARTITION suspended image to. It will simply pick the first available swap device. +config SUSPEND_SMP + bool + depends on HOTPLUG_CPU && X86 && PM + default y diff -puN drivers/acpi/Kconfig~kconfig drivers/acpi/Kconfig --- linux-2.6.11/drivers/acpi/Kconfig~kconfig 2005-04-28 15:52:38.148695136 +0800 +++ linux-2.6.11-root/drivers/acpi/Kconfig 2005-04-28 15:53:01.707113712 +0800 @@ -57,7 +57,7 @@ if ACPI_INTERPRETER config ACPI_SLEEP bool "Sleep States (EXPERIMENTAL)" - depends on X86 && (!SMP || HOTPLUG_CPU) + depends on X86 && (!SMP || SUSPEND_SMP) depends on EXPERIMENTAL default y ---help--- diff -puN include/linux/suspend.h~kconfig include/linux/suspend.h --- linux-2.6.11/include/linux/suspend.h~kconfig 2005-04-28 15:53:50.261732288 +0800 +++ linux-2.6.11-root/include/linux/suspend.h 2005-04-28 15:54:12.928286448 +0800 @@ -58,7 +58,7 @@ static inline int software_suspend(void) } #endif -#ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_SUSPEND_SMP extern void disable_nonboot_cpus(void); extern void enable_nonboot_cpus(void); #else _ |
From: Pavel M. <pa...@su...> - 2005-04-28 08:35:19
|
Hi! > > But does setting CONFIG_ACPI_SLEEP cause kernel/power/smp.o to be actually > > compiled and linked? I don't think so? > > > > Anyway, please send a tested fix. > Ha, this one should be ok. Only IA32 support SMP suspend now. Seems okay... Rafael, what is status of x86-64 smp swsusp? Pavel -- Boycott Kodak -- for their patent abuse against Java. |
From: Rafael J. W. <rj...@si...> - 2005-04-28 09:04:39
|
Hi, On Thursday, 28 of April 2005 10:35, Pavel Machek wrote: > Hi! > > > > But does setting CONFIG_ACPI_SLEEP cause kernel/power/smp.o to be actually > > > compiled and linked? I don't think so? > > > > > > Anyway, please send a tested fix. > > Ha, this one should be ok. Only IA32 support SMP suspend now. > > Seems okay... Rafael, what is status of x86-64 smp swsusp? Preliminary, semi-working. ;-) Actually, I'm waiting for the i386 CPU hotplug to settle down a bit to see how much of it I can use on x86-64. Greets, Rafael -- - Would you tell me, please, which way I ought to go from here? - That depends a good deal on where you want to get to. -- Lewis Carroll "Alice's Adventures in Wonderland" |