From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:07
|
Hey folks, Here's a new spin of the pvops64 patch series. We didn't get that many comments from the last time, so it should be probably almost ready to get in. Heya! >From the last version, the most notable changes are: * consolidation of system.h, merging jeremy's comments about ordering concerns * consolidation of smp functions that goes through smp_ops. They're sharing a bunch of code now. Other than that, just some issues that arose from the rebase. Please, not that this patch series _does not_ apply over linus git anymore, but rather, over tglx cleanup series. The first patch in this series is already on linus', but not on tglx', so I'm sending it again, because you'll need it if you want to compile it anyway. tglx, in the absense of any outstanding NACKs, or any very big call for improvements, could you please pull it in your tree? Have fun, |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:15
|
mm/sparse-vmemmap.c uses init_mm in some places. However, it is not present in any of the headers currently included in the file. init_mm is defined as extern in sched.h, so we add it to the headers list Up to now, this problem was masked by the fact that functions like set_pte_at() and pmd_populate_kernel() are usually macros that expand to simpler variants that does not use the first parameter at all. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Andrew Morton <ak...@li...> Signed-off-by: Linus Torvalds <tor...@li...> --- mm/sparse-vmemmap.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d3b718b..22620f6 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> +#include <linux/sched.h> #include <asm/dma.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:27:51
|
This patch consolidates the irqflags include files containing common paravirt definitions. The native definition for interrupt handling, halt, and such, are the same for 32 and 64 bit, and they are kept in irqflags.h. The differences are split in the arch-specific files. The syscall function, irq_enable_sysexit, has a very specific i386 naming, and its name is then changed to a more general one. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/asm-offsets_32.c | 2 +- arch/x86/kernel/entry_32.S | 8 +- arch/x86/kernel/paravirt_32.c | 10 +- arch/x86/kernel/vmi_32.c | 4 +- arch/x86/xen/enlighten.c | 2 +- include/asm-x86/irqflags.h | 246 +++++++++++++++++++++++++++++++++++++- include/asm-x86/irqflags_32.h | 174 --------------------------- include/asm-x86/irqflags_64.h | 154 ------------------------ include/asm-x86/paravirt.h | 9 +- 9 files changed, 261 insertions(+), 348 deletions(-) diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 0e45981..c1ccfab 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -123,7 +123,7 @@ void foo(void) OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); #endif diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dc7f938..d63609d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -58,7 +58,7 @@ * for paravirtualization. The following will never clobber any registers: * INTERRUPT_RETURN (aka. "iret") * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") - * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). + * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). * * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). @@ -351,7 +351,7 @@ sysenter_past_esp: xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs - ENABLE_INTERRUPTS_SYSEXIT + ENABLE_INTERRUPTS_SYSCALL_RET CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) @@ -882,10 +882,10 @@ ENTRY(native_iret) .previous END(native_iret) -ENTRY(native_irq_enable_sysexit) +ENTRY(native_irq_enable_syscall_ret) sti sysexit -END(native_irq_enable_sysexit) +END(native_irq_enable_syscall_ret) #endif KPROBE_ENTRY(int3) diff --git a/arch/x86/kernel/paravirt_32.c b/arch/x86/kernel/paravirt_32.c index 6a80d67..04f51d0 100644 --- a/arch/x86/kernel/paravirt_32.c +++ b/arch/x86/kernel/paravirt_32.c @@ -60,7 +60,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); DEF_NATIVE(pv_cpu_ops, iret, "iret"); -DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); +DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); @@ -88,7 +88,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf, SITE(pv_irq_ops, restore_fl); SITE(pv_irq_ops, save_fl); SITE(pv_cpu_ops, iret); - SITE(pv_cpu_ops, irq_enable_sysexit); + SITE(pv_cpu_ops, irq_enable_syscall_ret); SITE(pv_mmu_ops, read_cr2); SITE(pv_mmu_ops, read_cr3); SITE(pv_mmu_ops, write_cr3); @@ -186,7 +186,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, /* If the operation is a nop, then nop the callsite */ ret = paravirt_patch_nop(); else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || - type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit)) + type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) /* If operation requires a jmp, then jmp */ ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); else @@ -237,7 +237,7 @@ static void native_flush_tlb_single(unsigned long addr) /* These are in entry.S */ extern void native_iret(void); -extern void native_irq_enable_sysexit(void); +extern void native_irq_enable_syscall_ret(void); static int __init print_banner(void) { @@ -384,7 +384,7 @@ struct pv_cpu_ops pv_cpu_ops = { .write_idt_entry = write_dt_entry, .load_esp0 = native_load_esp0, - .irq_enable_sysexit = native_irq_enable_sysexit, + .irq_enable_syscall_ret = native_irq_enable_syscall_ret, .iret = native_iret, .set_iopl_mask = native_set_iopl_mask, diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index f02bad6..aacce42 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -148,7 +148,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, insns, eip); case PARAVIRT_PATCH(pv_cpu_ops.iret): return patch_internal(VMI_CALL_IRET, len, insns, eip); - case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): + case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip); default: break; @@ -870,7 +870,7 @@ static inline int __init activate_vmi(void) * the backend. They are performance critical anyway, so requiring * a patch is not a big problem. */ - pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; + pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; pv_cpu_ops.iret = (void *)0xbadbab0; #ifdef CONFIG_SMP diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 94c39aa..094b915 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -953,7 +953,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .read_pmc = native_read_pmc, .iret = (void *)&hypercall_page[__HYPERVISOR_iret], - .irq_enable_sysexit = NULL, /* never called */ + .irq_enable_syscall_ret = NULL, /* never called */ .load_tr_desc = paravirt_nop, .set_ldt = xen_set_ldt, diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index 1b695ff..92021c1 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h @@ -1,5 +1,245 @@ -#ifdef CONFIG_X86_32 -# include "irqflags_32.h" +#ifndef _X86_IRQFLAGS_H_ +#define _X86_IRQFLAGS_H_ + +#include <asm/processor-flags.h> + +#ifndef __ASSEMBLY__ +/* + * Interrupt control: + */ + +static inline unsigned long native_save_fl(void) +{ + unsigned long flags; + + __asm__ __volatile__( + "# __raw_save_flags\n\t" + "pushf ; pop %0" + : "=g" (flags) + : /* no input */ + : "memory" + ); + + return flags; +} + +static inline void native_restore_fl(unsigned long flags) +{ + __asm__ __volatile__( + "push %0 ; popf" + : /* no output */ + :"g" (flags) + :"memory", "cc" + ); +} + +static inline void native_irq_disable(void) +{ + asm volatile("cli": : :"memory"); +} + +static inline void native_irq_enable(void) +{ + asm volatile("sti": : :"memory"); +} + +static inline void native_safe_halt(void) +{ + asm volatile("sti; hlt": : :"memory"); +} + +static inline void native_halt(void) +{ + asm volatile("hlt": : :"memory"); +} + +#endif + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#ifndef __ASSEMBLY__ + +static inline unsigned long __raw_local_save_flags(void) +{ + return native_save_fl(); +} + +static inline void raw_local_irq_restore(unsigned long flags) +{ + native_restore_fl(flags); +} + +static inline void raw_local_irq_disable(void) +{ + native_irq_disable(); +} + +static inline void raw_local_irq_enable(void) +{ + native_irq_enable(); +} + +/* + * Used in the idle loop; sti takes one instruction cycle + * to complete: + */ +static inline void raw_safe_halt(void) +{ + native_safe_halt(); +} + +/* + * Used when interrupts are already enabled or to + * shutdown the processor: + */ +static inline void halt(void) +{ + native_halt(); +} + +/* + * For spinlocks, etc: + */ +static inline unsigned long __raw_local_irq_save(void) +{ + unsigned long flags = __raw_local_save_flags(); + + raw_local_irq_disable(); + + return flags; +} +#else + +#define ENABLE_INTERRUPTS(x) sti +#define DISABLE_INTERRUPTS(x) cli + +#ifdef CONFIG_X86_64 +#define INTERRUPT_RETURN iretq +#define ENABLE_INTERRUPTS_SYSCALL_RET \ + movq %gs:pda_oldrsp, %rsp; \ + swapgs; \ + sysretq; +#else +#define INTERRUPT_RETURN iret +#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit +#define GET_CR0_INTO_EAX movl %cr0, %eax +#endif + + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_PARAVIRT */ + +#ifndef __ASSEMBLY__ +#define raw_local_save_flags(flags) \ + do { (flags) = __raw_local_save_flags(); } while (0) + +#define raw_local_irq_save(flags) \ + do { (flags) = __raw_local_irq_save(); } while (0) + +static inline int raw_irqs_disabled_flags(unsigned long flags) +{ + return !(flags & X86_EFLAGS_IF); +} + +static inline int raw_irqs_disabled(void) +{ + unsigned long flags = __raw_local_save_flags(); + + return raw_irqs_disabled_flags(flags); +} + +/* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +static inline void trace_hardirqs_fixup(void) +{ + unsigned long flags = __raw_local_save_flags(); + + trace_hardirqs_fixup_flags(flags); +} + #else -# include "irqflags_64.h" + +#ifdef CONFIG_X86_64 +/* + * Currently paravirt can't handle swapgs nicely when we + * don't have a stack we can rely on (such as a user space + * stack). So we either find a way around these or just fault + * and emulate if a guest tries to call swapgs directly. + * + * Either way, this is a good way to document that we don't + * have a reliable stack. x86_64 only. + */ +#define SWAPGS_UNSAFE_STACK swapgs +#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk +#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk +#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; + +#else +#define ARCH_TRACE_IRQS_ON \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_on; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_TRACE_IRQS_OFF \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call trace_hardirqs_off; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + +#define ARCH_LOCKDEP_SYS_EXIT_IRQ +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON +# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif + +#endif /* __ASSEMBLY__ */ #endif diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h deleted file mode 100644 index d8500f4..0000000 --- a/include/asm-x86/irqflags_32.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * IRQ flags handling - * - * This file gets included from lowlevel asm headers too, to provide - * wrapped versions of the local_irq_*() APIs, based on the - * raw_local_irq_*() functions from the lowlevel headers. - */ -#ifndef _ASM_IRQFLAGS_H -#define _ASM_IRQFLAGS_H -#include <asm/processor-flags.h> - -#ifndef __ASSEMBLY__ -static inline unsigned long native_save_fl(void) -{ - unsigned long f; - asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); - return f; -} - -static inline void native_restore_fl(unsigned long f) -{ - asm volatile("pushl %0 ; popfl": /* no output */ - :"g" (f) - :"memory", "cc"); -} - -static inline void native_irq_disable(void) -{ - asm volatile("cli": : :"memory"); -} - -static inline void native_irq_enable(void) -{ - asm volatile("sti": : :"memory"); -} - -static inline void native_safe_halt(void) -{ - asm volatile("sti; hlt": : :"memory"); -} - -static inline void native_halt(void) -{ - asm volatile("hlt": : :"memory"); -} -#endif /* __ASSEMBLY__ */ - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#ifndef __ASSEMBLY__ - -static inline unsigned long __raw_local_save_flags(void) -{ - return native_save_fl(); -} - -static inline void raw_local_irq_restore(unsigned long flags) -{ - native_restore_fl(flags); -} - -static inline void raw_local_irq_disable(void) -{ - native_irq_disable(); -} - -static inline void raw_local_irq_enable(void) -{ - native_irq_enable(); -} - -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static inline void raw_safe_halt(void) -{ - native_safe_halt(); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static inline void halt(void) -{ - native_halt(); -} - -/* - * For spinlocks, etc: - */ -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_disable(); - - return flags; -} - -#else -#define DISABLE_INTERRUPTS(clobbers) cli -#define ENABLE_INTERRUPTS(clobbers) sti -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define INTERRUPT_RETURN iret -#define GET_CR0_INTO_EAX movl %cr0, %eax -#endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ - -#ifndef __ASSEMBLY__ -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF); -} - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} -#endif /* __ASSEMBLY__ */ - -/* - * Do the CPU's IRQ-state tracing from assembly code. We call a - * C function, so save all the C-clobbered registers: - */ -#ifdef CONFIG_TRACE_IRQFLAGS - -# define TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -# define TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -#else -# define LOCKDEP_SYS_EXIT -#endif - -#endif diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h deleted file mode 100644 index 857d9eb..0000000 --- a/include/asm-x86/irqflags_64.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * IRQ flags handling - * - * This file gets included from lowlevel asm headers too, to provide - * wrapped versions of the local_irq_*() APIs, based on the - * raw_local_irq_*() functions from the lowlevel headers. - */ -#ifndef _ASM_IRQFLAGS_H -#define _ASM_IRQFLAGS_H -#include <asm/processor-flags.h> - -#ifndef __ASSEMBLY__ -/* - * Interrupt control: - */ - -static inline unsigned long __raw_local_save_flags(void) -{ - unsigned long flags; - - __asm__ __volatile__( - "# __raw_save_flags\n\t" - "pushfq ; popq %q0" - : "=g" (flags) - : /* no input */ - : "memory" - ); - - return flags; -} - -#define raw_local_save_flags(flags) \ - do { (flags) = __raw_local_save_flags(); } while (0) - -static inline void raw_local_irq_restore(unsigned long flags) -{ - __asm__ __volatile__( - "pushq %0 ; popfq" - : /* no output */ - :"g" (flags) - :"memory", "cc" - ); -} - -#ifdef CONFIG_X86_VSMP - -/* - * Interrupt control for the VSMP architecture: - */ - -static inline void raw_local_irq_disable(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); -} - -static inline void raw_local_irq_enable(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); -} - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC); -} - -#else /* CONFIG_X86_VSMP */ - -static inline void raw_local_irq_disable(void) -{ - __asm__ __volatile__("cli" : : : "memory"); -} - -static inline void raw_local_irq_enable(void) -{ - __asm__ __volatile__("sti" : : : "memory"); -} - -static inline int raw_irqs_disabled_flags(unsigned long flags) -{ - return !(flags & X86_EFLAGS_IF); -} - -#endif - -/* - * For spinlocks, etc.: - */ - -static inline unsigned long __raw_local_irq_save(void) -{ - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_disable(); - - return flags; -} - -#define raw_local_irq_save(flags) \ - do { (flags) = __raw_local_irq_save(); } while (0) - -static inline int raw_irqs_disabled(void) -{ - unsigned long flags = __raw_local_save_flags(); - - return raw_irqs_disabled_flags(flags); -} - -/* - * Used in the idle loop; sti takes one instruction cycle - * to complete: - */ -static inline void raw_safe_halt(void) -{ - __asm__ __volatile__("sti; hlt" : : : "memory"); -} - -/* - * Used when interrupts are already enabled or to - * shutdown the processor: - */ -static inline void halt(void) -{ - __asm__ __volatile__("hlt": : :"memory"); -} - -#else /* __ASSEMBLY__: */ -# ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk -# else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -# endif -# ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - SAVE_REST; \ - LOCKDEP_SYS_EXIT; \ - RESTORE_REST; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -# endif -#endif - -#endif diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index f59d370..d81a361 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -121,7 +121,7 @@ struct pv_cpu_ops { u64 (*read_pmc)(void); /* These two are jmp to, not actually called. */ - void (*irq_enable_sysexit)(void); + void (*irq_enable_syscall_ret)(void); void (*iret)(void); struct pv_lazy_ops lazy_mode; @@ -1138,9 +1138,10 @@ static inline unsigned long __raw_local_irq_save(void) call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ popl %edx; popl %ecx; popl %eax) -#define ENABLE_INTERRUPTS_SYSEXIT \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\ - jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit) +#define ENABLE_INTERRUPTS_SYSCALL_RET \ + PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ + CLBR_NONE, \ + jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_syscall_ret) #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:43
|
The cli and sti instructions need to be replaced by paravirt hooks. For the i386 architecture, this is already done. The code requirements aren't much different from x86_64 POV, so this part is consolidated in the common header Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- include/asm-x86/spinlock.h | 14 ++++++++++++++ include/asm-x86/spinlock_32.h | 9 --------- include/asm-x86/spinlock_64.h | 8 +++++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index d74d85e..e1d555a 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -1,5 +1,19 @@ +#ifndef _X86_SPINLOCK_H_ +#define _X86_SPINLOCK_H_ + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define CLI_STRING "cli" +#define STI_STRING "sti" +#define CLI_STI_CLOBBERS +#define CLI_STI_INPUT_ARGS +#endif /* CONFIG_PARAVIRT */ + #ifdef CONFIG_X86_32 # include "spinlock_32.h" #else # include "spinlock_64.h" #endif + +#endif diff --git a/include/asm-x86/spinlock_32.h b/include/asm-x86/spinlock_32.h index d3bcebe..ebbf371 100644 --- a/include/asm-x86/spinlock_32.h +++ b/include/asm-x86/spinlock_32.h @@ -7,15 +7,6 @@ #include <asm/processor.h> #include <linux/compiler.h> -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define CLI_STRING "cli" -#define STI_STRING "sti" -#define CLI_STI_CLOBBERS -#define CLI_STI_INPUT_ARGS -#endif /* CONFIG_PARAVIRT */ - /* * Your basic SMP spinlocks, allowing only a single CPU anywhere * diff --git a/include/asm-x86/spinlock_64.h b/include/asm-x86/spinlock_64.h index 88bf981..e56b17e 100644 --- a/include/asm-x86/spinlock_64.h +++ b/include/asm-x86/spinlock_64.h @@ -48,12 +48,12 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla "jns 5f\n" "testl $0x200, %1\n\t" /* interrupts were disabled? */ "jz 4f\n\t" - "sti\n" + STI_STRING "\n" "3:\t" "rep;nop\n\t" "cmpl $0, %0\n\t" "jle 3b\n\t" - "cli\n\t" + CLI_STRING "\n\t" "jmp 1b\n" "4:\t" "rep;nop\n\t" @@ -61,7 +61,9 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla "jg 1b\n\t" "jmp 4b\n" "5:\n\t" - : "+m" (lock->slock) : "r" ((unsigned)flags) : "memory"); + : "+m" (lock->slock) + : "r" ((unsigned)flags) CLI_STI_INPUT_ARGS + : "memory" CLI_STI_CLOBBERS); } #endif -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:20
|
This patch consolidates part of the tlb handling functions for the x86 architecture. In this approach, we start by the parts actually used for paravirt in i386. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/smp_64.c | 5 ++- include/asm-x86/tlbflush.h | 77 +++++++++++++++++++++++++++++++++++++++++ include/asm-x86/tlbflush_32.h | 77 ----------------------------------------- include/asm-x86/tlbflush_64.h | 43 +++-------------------- 4 files changed, 85 insertions(+), 117 deletions(-) diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index 62b0f2a..ce3935b 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -166,11 +166,12 @@ out: add_pda(irq_tlb_count, 1); } -static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, - unsigned long va) +void native_flush_tlb_others(const cpumask_t *cpumaskp, + struct mm_struct *mm, unsigned long va) { int sender; union smp_flush_state *f; + cpumask_t cpumask = *cpumaskp; /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index 9af4cc8..93283cf 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h @@ -1,5 +1,82 @@ +#ifndef _X86_TLBFLUSH_H_ +#define _X86_TLBFLUSH_H_ + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define __flush_tlb() __native_flush_tlb() +#define __flush_tlb_global() __native_flush_tlb_global() +#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) +#endif + +static inline void __native_flush_tlb(void) +{ + write_cr3(read_cr3()); +} + +static inline void __native_flush_tlb_global(void) +{ + unsigned long cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ + write_cr4(cr4); /* write old PGE again and flush TLBs */ +} + +#define __native_flush_tlb_single(addr) \ + __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") + +#ifdef CONFIG_SMP + +#include <asm/smp.h> +#include <linux/mm.h> + +#define local_flush_tlb() \ + __flush_tlb() + +extern void flush_tlb_all(void); +extern void flush_tlb_current_task(void); +extern void flush_tlb_mm(struct mm_struct *); +extern void flush_tlb_page(struct vm_area_struct *, unsigned long); + +#define flush_tlb() flush_tlb_current_task() + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + flush_tlb_mm(vma->vm_mm); +} + +void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, + unsigned long va); + +#define TLBSTATE_OK 1 +#define TLBSTATE_LAZY 2 + +#ifdef CONFIG_X86_64 +/* Roughly an IPI every 20MB with 4k pages for freeing page table + ranges. Cost is about 42k of memory for each CPU. */ +#define ARCH_FREE_PTE_NR 5350 + +#else /* X86_64 */ +struct tlb_state +{ + struct mm_struct *active_mm; + int state; + char __cacheline_padding[L1_CACHE_BYTES-8]; +}; +DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); +#endif /* X86_64 */ + +#endif + +#ifndef CONFIG_PARAVIRT +#define flush_tlb_others(mask, mm, va) \ + native_flush_tlb_others(&mask, mm, va) +#endif + #ifdef CONFIG_X86_32 # include "tlbflush_32.h" #else # include "tlbflush_64.h" #endif + +#endif diff --git a/include/asm-x86/tlbflush_32.h b/include/asm-x86/tlbflush_32.h index 2bd5b95..07eaf37 100644 --- a/include/asm-x86/tlbflush_32.h +++ b/include/asm-x86/tlbflush_32.h @@ -1,49 +1,8 @@ #ifndef _I386_TLBFLUSH_H #define _I386_TLBFLUSH_H -#include <linux/mm.h> #include <asm/processor.h> -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define __flush_tlb() __native_flush_tlb() -#define __flush_tlb_global() __native_flush_tlb_global() -#define __flush_tlb_single(addr) __native_flush_tlb_single(addr) -#endif - -#define __native_flush_tlb() \ - do { \ - unsigned int tmpreg; \ - \ - __asm__ __volatile__( \ - "movl %%cr3, %0; \n" \ - "movl %0, %%cr3; # flush TLB \n" \ - : "=r" (tmpreg) \ - :: "memory"); \ - } while (0) - -/* - * Global pages have to be flushed a bit differently. Not a real - * performance problem because this does not happen often. - */ -#define __native_flush_tlb_global() \ - do { \ - unsigned int tmpreg, cr4, cr4_orig; \ - \ - __asm__ __volatile__( \ - "movl %%cr4, %2; # turn off PGE \n" \ - "movl %2, %1; \n" \ - "andl %3, %1; \n" \ - "movl %1, %%cr4; \n" \ - "movl %%cr3, %0; \n" \ - "movl %0, %%cr3; # flush TLB \n" \ - "movl %2, %%cr4; # turn PGE back on \n" \ - : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ - : "i" (~X86_CR4_PGE) \ - : "memory"); \ - } while (0) - #define __native_flush_tlb_single(addr) \ __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory") @@ -120,44 +79,8 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask, { } -#else /* SMP */ - -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) -{ - flush_tlb_mm(vma->vm_mm); -} - -void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, - unsigned long va); - -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -struct tlb_state -{ - struct mm_struct *active_mm; - int state; - char __cacheline_padding[L1_CACHE_BYTES-8]; -}; -DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); #endif /* SMP */ -#ifndef CONFIG_PARAVIRT -#define flush_tlb_others(mask, mm, va) \ - native_flush_tlb_others(&mask, mm, va) -#endif static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) diff --git a/include/asm-x86/tlbflush_64.h b/include/asm-x86/tlbflush_64.h index 7731fd2..d1d1097 100644 --- a/include/asm-x86/tlbflush_64.h +++ b/include/asm-x86/tlbflush_64.h @@ -6,21 +6,8 @@ #include <asm/processor.h> #include <asm/system.h> -static inline void __flush_tlb(void) -{ - write_cr3(read_cr3()); -} - -static inline void __flush_tlb_all(void) -{ - unsigned long cr4 = read_cr4(); - write_cr4(cr4 & ~X86_CR4_PGE); /* clear PGE */ - write_cr4(cr4); /* write old PGE again and flush TLBs */ -} - -#define __flush_tlb_one(addr) \ - __asm__ __volatile__("invlpg (%0)" :: "r" (addr) : "memory") - +#define __flush_tlb_one(addr) __flush_tlb_single(addr) +#define __flush_tlb_all() __flush_tlb_global() /* * TLB flushing: @@ -63,32 +50,12 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, __flush_tlb(); } -#else - -#include <asm/smp.h> - -#define local_flush_tlb() \ - __flush_tlb() - -extern void flush_tlb_all(void); -extern void flush_tlb_current_task(void); -extern void flush_tlb_mm(struct mm_struct *); -extern void flush_tlb_page(struct vm_area_struct *, unsigned long); - -#define flush_tlb() flush_tlb_current_task() - -static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end) +static inline void native_flush_tlb_others(const cpumask_t *cpumask, + struct mm_struct *mm, + unsigned long va) { - flush_tlb_mm(vma->vm_mm); } -#define TLBSTATE_OK 1 -#define TLBSTATE_LAZY 2 - -/* Roughly an IPI every 20MB with 4k pages for freeing page table - ranges. Cost is about 42k of memory for each CPU. */ -#define ARCH_FREE_PTE_NR 5350 - #endif static inline void flush_tlb_kernel_range(unsigned long start, -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:55
|
This patch consolidates part of the pieces of smp for both architectures. (i386 and x86_64). It makes part the calls go through smp_ops, and shares code for those functions in smpcommon.c There's more room for code sharing here, but it is left as an exercise to the reader ;-) Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/Makefile_32 | 2 +- arch/x86/kernel/Makefile_64 | 2 +- arch/x86/kernel/smp_32.c | 218 ------------------------------ arch/x86/kernel/smp_64.c | 245 --------------------------------- arch/x86/kernel/smpboot_64.c | 8 +- arch/x86/kernel/smpcommon.c | 291 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/smpcommon_32.c | 81 ----------- include/asm-x86/idle.h | 18 +++- include/asm-x86/smp.h | 66 +++++++++ include/asm-x86/smp_32.h | 58 -------- include/asm-x86/smp_64.h | 4 - 11 files changed, 379 insertions(+), 614 deletions(-) diff --git a/arch/x86/kernel/Makefile_32 b/arch/x86/kernel/Makefile_32 index b08179a..b0da543 100644 --- a/arch/x86/kernel/Makefile_32 +++ b/arch/x86/kernel/Makefile_32 @@ -20,7 +20,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_PCI) += early-quirks.o obj-$(CONFIG_APM) += apm_32.o obj-$(CONFIG_X86_SMP) += smp_32.o smpboot_32.o tsc_sync.o -obj-$(CONFIG_SMP) += smpcommon_32.o +obj-$(CONFIG_SMP) += smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_32.o obj-$(CONFIG_X86_MPPARSE) += mpparse_32.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_32.o nmi_32.o diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 686de84..ffee997 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -17,7 +17,7 @@ obj-y += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o -obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o +obj-$(CONFIG_SMP) += smp_64.o smpboot_64.o trampoline_64.o tsc_sync.o smpcommon.o obj-y += apic_64.o nmi_64.o obj-y += io_apic_64.o mpparse_64.o genapic_64.o genapic_flat_64.o obj-$(CONFIG_KEXEC) += machine_kexec_64.o relocate_kernel_64.o crash.o diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c index fcaa026..a7cc319 100644 --- a/arch/x86/kernel/smp_32.c +++ b/arch/x86/kernel/smp_32.c @@ -464,213 +464,6 @@ void flush_tlb_all(void) on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static void native_smp_send_reschedule(int cpu) -{ - WARN_ON(cpu_is_offline(cpu)); - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -void lock_ipi_call_lock(void) -{ - spin_lock_irq(&call_lock); -} - -void unlock_ipi_call_lock(void) -{ - spin_unlock_irq(&call_lock); -} - -static struct call_data_struct *call_data; - -static void __smp_call_function(void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus() - 1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); - - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - - -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -static int -native_smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - struct call_data_struct data; - cpumask_t allbutself; - int cpus; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - /* Holding any lock stops cpus from going down. */ - spin_lock(&call_lock); - - allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); - - if (!cpus) { - spin_unlock(&call_lock); - return 0; - } - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - mb(); - - /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (wait) - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - spin_unlock(&call_lock); - - return 0; -} - -static void stop_this_cpu (void * dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - if (cpu_data(smp_processor_id()).hlt_works_ok) - for(;;) halt(); - for (;;); -} - -/* - * this function calls the 'stop' function on all other CPUs in the system. - */ - -static void native_smp_send_stop(void) -{ - /* Don't deadlock on the call lock in panic */ - int nolock = !spin_trylock(&call_lock); - unsigned long flags; - - local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); - if (!nolock) - spin_unlock(&call_lock); - disable_local_APIC(); - local_irq_restore(flags); -} - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -fastcall void smp_reschedule_interrupt(struct pt_regs *regs) -{ - ack_APIC_irq(); - __get_cpu_var(irq_stat).irq_resched_count++; -} - -fastcall void smp_call_function_interrupt(struct pt_regs *regs) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - ack_APIC_irq(); - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - irq_enter(); - (*func)(info); - __get_cpu_var(irq_stat).irq_call_count++; - irq_exit(); - - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } -} - static int convert_apicid_to_cpu(int apic_id) { int i; @@ -698,14 +491,3 @@ int safe_smp_processor_id(void) return cpuid >= 0 ? cpuid : 0; } -struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .cpu_up = native_cpu_up, - .smp_cpus_done = native_smp_cpus_done, - - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, - .smp_call_function_mask = native_smp_call_function_mask, -}; -EXPORT_SYMBOL_GPL(smp_ops); diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/smp_64.c index ce3935b..c19c68b 100644 --- a/arch/x86/kernel/smp_64.c +++ b/arch/x86/kernel/smp_64.c @@ -283,248 +283,3 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, NULL, 1, 1); } - -/* - * this function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ - -void smp_send_reschedule(int cpu) -{ - send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); -} - -/* - * Structure and data for smp_call_function(). This is designed to minimise - * static memory requirements. It also looks cleaner. - */ -static DEFINE_SPINLOCK(call_lock); - -struct call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static struct call_data_struct * call_data; - -void lock_ipi_call_lock(void) -{ - spin_lock_irq(&call_lock); -} - -void unlock_ipi_call_lock(void) -{ - spin_unlock_irq(&call_lock); -} - -/* - * this function sends a 'generic call function' IPI to all other CPU - * of the system defined in the mask. - */ -static int __smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - struct call_data_struct data; - cpumask_t allbutself; - int cpus; - - allbutself = cpu_online_map; - cpu_clear(smp_processor_id(), allbutself); - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); - - if (!cpus) - return 0; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - - /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - else - send_IPI_mask(mask, CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return 0; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); - - return 0; -} -/** - * smp_call_function_mask(): Run a function on a set of other CPUs. - * @mask: The set of cpus to run on. Must not include the current cpu. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int smp_call_function_mask(cpumask_t mask, - void (*func)(void *), void *info, - int wait) -{ - int ret; - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - spin_lock(&call_lock); - ret = __smp_call_function_mask(mask, func, info, wait); - spin_unlock(&call_lock); - return ret; -} -EXPORT_SYMBOL(smp_call_function_mask); - -/* - * smp_call_function_single - Run a function on a specific CPU - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Currently unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Retrurns 0 on success, else a negative status code. - * - * Does not return until the remote CPU is nearly ready to execute <func> - * or is or has executed. - */ - -int smp_call_function_single (int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - /* prevent preemption and reschedule on another processor */ - int ret, me = get_cpu(); - - /* Can deadlock when called with interrupts disabled */ - WARN_ON(irqs_disabled()); - - if (cpu == me) { - local_irq_disable(); - func(info); - local_irq_enable(); - put_cpu(); - return 0; - } - - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); - - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); - -/* - * smp_call_function - run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: currently unused. - * @wait: If true, wait (atomically) until function has completed on other - * CPUs. - * - * Returns 0 on success, else a negative status code. Does not return until - * remote CPUs are nearly ready to execute func or are or have executed. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - * Actually there are a few legal cases, like panic. - */ -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); -} -EXPORT_SYMBOL(smp_call_function); - -static void stop_this_cpu(void *dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - for (;;) - halt(); -} - -void smp_send_stop(void) -{ - int nolock; - unsigned long flags; - - if (reboot_force) - return; - - /* Don't deadlock on the call lock in panic */ - nolock = !spin_trylock(&call_lock); - local_irq_save(flags); - __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); - if (!nolock) - spin_unlock(&call_lock); - disable_local_APIC(); - local_irq_restore(flags); -} - -/* - * Reschedule call back. Nothing to do, - * all the work is done automatically when - * we return from the interrupt. - */ -asmlinkage void smp_reschedule_interrupt(void) -{ - ack_APIC_irq(); - add_pda(irq_resched_count, 1); -} - -asmlinkage void smp_call_function_interrupt(void) -{ - void (*func) (void *info) = call_data->func; - void *info = call_data->info; - int wait = call_data->wait; - - ack_APIC_irq(); - /* - * Notify initiating CPU that I've grabbed the data and am - * about to execute the function - */ - mb(); - atomic_inc(&call_data->started); - /* - * At this point the info structure may be out of scope unless wait==1 - */ - exit_idle(); - irq_enter(); - (*func)(info); - add_pda(irq_call_count, 1); - irq_exit(); - if (wait) { - mb(); - atomic_inc(&call_data->finished); - } -} - diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 6f3e63c..17e54fa 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -864,7 +864,7 @@ void __init smp_set_apicids(void) * Prepare for SMP bootup. The MP table or ACPI has been read * earlier. Just do some sanity checking here and enable APIC mode. */ -void __init smp_prepare_cpus(unsigned int max_cpus) +void __init native_smp_prepare_cpus(unsigned int max_cpus) { nmi_watchdog_default(); current_cpu_data = boot_cpu_data; @@ -908,7 +908,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* * Early setup to make printk work. */ -void __init smp_prepare_boot_cpu(void) +void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); cpu_set(me, cpu_online_map); @@ -919,7 +919,7 @@ void __init smp_prepare_boot_cpu(void) /* * Entry point to boot a CPU. */ -int __cpuinit __cpu_up(unsigned int cpu) +int __cpuinit native_cpu_up(unsigned int cpu) { int apicid = cpu_present_to_apicid(cpu); unsigned long flags; @@ -977,7 +977,7 @@ int __cpuinit __cpu_up(unsigned int cpu) /* * Finish the SMP boot. */ -void __init smp_cpus_done(unsigned int max_cpus) +void __init native_smp_cpus_done(unsigned int max_cpus) { smp_cleanup_boot(); setup_ioapic_dest(); diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c new file mode 100644 index 0000000..e3bee40 --- /dev/null +++ b/arch/x86/kernel/smpcommon.c @@ -0,0 +1,291 @@ +/* + * SMP stuff which is common to all sub-architectures. + */ +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/interrupt.h> + +#include <asm/mach_apic.h> +#include <asm/proto.h> +#include <asm/apicdef.h> +#include <asm/smp.h> +#include <asm/idle.h> + +#ifdef CONFIG_X86_32 +DEFINE_PER_CPU(unsigned long, this_cpu_off); +EXPORT_PER_CPU_SYMBOL(this_cpu_off); + +/* Initialize the CPU's GDT. This is either the boot CPU doing itself + (still using the master per-cpu area), or a CPU doing it for a + secondary which will soon come up. */ +__cpuinit void init_gdt(int cpu) +{ + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + + pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, + (u32 *)&gdt[GDT_ENTRY_PERCPU].b, + __per_cpu_offset[cpu], 0xFFFFF, + 0x80 | DESCTYPE_S | 0x2, 0x8); + + per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; + per_cpu(cpu_number, cpu) = cpu; +} +#endif + +/* + * Structure and data for smp_call_function(). This is designed to minimise + * static memory requirements. It also looks cleaner. + */ +static DEFINE_SPINLOCK(call_lock); + +struct call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static struct call_data_struct * call_data; + +void lock_ipi_call_lock(void) +{ + spin_lock_irq(&call_lock); +} + +void unlock_ipi_call_lock(void) +{ + spin_unlock_irq(&call_lock); +} + +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +static int +native_smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + struct call_data_struct data; + cpumask_t allbutself; + int cpus; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + /* Holding any lock stops cpus from going down. */ + spin_lock(&call_lock); + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) { + spin_unlock(&call_lock); + return 0; + } + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + mb(); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (wait) + while (atomic_read(&data.finished) != cpus) + cpu_relax(); + spin_unlock(&call_lock); + + return 0; +} + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function(void (*func) (void *info), void *info, int nonatomic, + int wait) +{ + return smp_call_function_mask(cpu_online_map, func, info, wait); +} +EXPORT_SYMBOL(smp_call_function); + +/** + * smp_call_function_single - Run a function on a specific CPU + * @cpu: The target CPU. Cannot be the calling CPU. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + */ +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int ret; + int me = get_cpu(); + if (cpu == me) { + local_irq_disable(); + func(info); + local_irq_enable(); + put_cpu(); + return 0; + } + + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + + put_cpu(); + return ret; +} +EXPORT_SYMBOL(smp_call_function_single); + +static void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + disable_local_APIC(); +#ifdef CONFIG_X86_32 + if (cpu_data(smp_processor_id()).hlt_works_ok) +#endif + for (;;) + halt(); + for (;;); +} + +void native_smp_send_stop(void) +{ + int nolock; + unsigned long flags; + +#ifdef CONFIG_X86_64 + if (reboot_force) + return; +#endif + + /* Don't deadlock on the call lock in panic */ + nolock = !spin_trylock(&call_lock); + local_irq_save(flags); + smp_call_function(stop_this_cpu, NULL, 0, 0); + if (!nolock) + spin_unlock(&call_lock); + disable_local_APIC(); + local_irq_restore(flags); +} + +asmlinkage void smp_call_function_interrupt(void) +{ + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + ack_APIC_irq(); + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(&call_data->started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + exit_idle(); + irq_enter(); + (*func)(info); + /* FIXME: Put this thing in the same place for both arches */ +#ifdef CONFIG_X86_64 + add_pda(irq_call_count, 1); +#else + __get_cpu_var(irq_stat).irq_call_count++; +#endif + irq_exit(); + if (wait) { + mb(); + atomic_inc(&call_data->finished); + } +} + +/* + * this function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +static void native_smp_send_reschedule(int cpu) +{ + WARN_ON(cpu_is_offline(cpu)); + send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); +} + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +asmlinkage void smp_reschedule_interrupt(struct pt_regs *regs) +{ + ack_APIC_irq(); + /* FIXME: Put this thing in the same place for both arches */ +#ifdef CONFIG_X86_64 + add_pda(irq_call_count, 1); +#else + __get_cpu_var(irq_stat).irq_resched_count++; +#endif +} + +struct smp_ops smp_ops = { + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, + .cpu_up = native_cpu_up, + .smp_cpus_done = native_smp_cpus_done, + + .smp_send_stop = native_smp_send_stop, + .smp_send_reschedule = native_smp_send_reschedule, + .smp_call_function_mask = native_smp_call_function_mask, +}; +EXPORT_SYMBOL(smp_ops); diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index bbfe85a..0000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null @@ -1,81 +0,0 @@ -/* - * SMP stuff which is common to all sub-architectures. - */ -#include <linux/module.h> -#include <asm/smp.h> - -DEFINE_PER_CPU(unsigned long, this_cpu_off); -EXPORT_PER_CPU_SYMBOL(this_cpu_off); - -/* Initialize the CPU's GDT. This is either the boot CPU doing itself - (still using the master per-cpu area), or a CPU doing it for a - secondary which will soon come up. */ -__cpuinit void init_gdt(int cpu) -{ - struct desc_struct *gdt = get_cpu_gdt_table(cpu); - - pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, - (u32 *)&gdt[GDT_ENTRY_PERCPU].b, - __per_cpu_offset[cpu], 0xFFFFF, - 0x80 | DESCTYPE_S | 0x2, 0x8); - - per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; - per_cpu(cpu_number, cpu) = cpu; -} - - -/** - * smp_call_function(): Run a function on all other CPUs. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait (atomically) until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - * - * You must not call this function with disabled interrupts or from a - * hardware interrupt handler or from a bottom half handler. - */ -int smp_call_function(void (*func) (void *info), void *info, int nonatomic, - int wait) -{ - return smp_call_function_mask(cpu_online_map, func, info, wait); -} -EXPORT_SYMBOL(smp_call_function); - -/** - * smp_call_function_single - Run a function on a specific CPU - * @cpu: The target CPU. Cannot be the calling CPU. - * @func: The function to run. This must be fast and non-blocking. - * @info: An arbitrary pointer to pass to the function. - * @nonatomic: Unused. - * @wait: If true, wait until function has completed on other CPUs. - * - * Returns 0 on success, else a negative status code. - * - * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. - */ -int smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - /* prevent preemption and reschedule on another processor */ - int ret; - int me = get_cpu(); - if (cpu == me) { - local_irq_disable(); - func(info); - local_irq_enable(); - put_cpu(); - return 0; - } - - ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); - - put_cpu(); - return ret; -} -EXPORT_SYMBOL(smp_call_function_single); diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index d240e5b..487bca1 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IDLE_H -#define _ASM_X86_64_IDLE_H 1 +#ifndef _ASM_X86_IDLE_H +#define _ASM_X86_IDLE_H 1 #define IDLE_START 1 #define IDLE_END 2 @@ -7,7 +7,21 @@ struct notifier_block; void idle_notifier_register(struct notifier_block *n); +#ifdef CONFIG_X86_64 void enter_idle(void); void exit_idle(void); +#else +/* + * i386 does not really uses it, but define them here so we can avoid ifdefs in + * shared locations that uses it + */ +static inline void enter_idle(void) +{ +} +static inline void exit_idle(void) +{ +} +#endif + #endif diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index f2e8319..b2f99df 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -1,5 +1,71 @@ +#ifndef _X86_SMP_H_ +#define _X86_SMP_H_ + +#ifndef __ASSEMBLY__ +struct smp_ops +{ + void (*smp_prepare_boot_cpu)(void); + void (*smp_prepare_cpus)(unsigned max_cpus); + int (*cpu_up)(unsigned cpu); + void (*smp_cpus_done)(unsigned max_cpus); + + void (*smp_send_stop)(void); + void (*smp_send_reschedule)(int cpu); + int (*smp_call_function_mask)(cpumask_t mask, + void (*func)(void *info), void *info, + int wait); +}; + +extern struct smp_ops smp_ops; + +static inline void smp_prepare_boot_cpu(void) +{ + smp_ops.smp_prepare_boot_cpu(); +} +static inline void smp_prepare_cpus(unsigned int max_cpus) +{ + smp_ops.smp_prepare_cpus(max_cpus); +} +static inline int __cpu_up(unsigned int cpu) +{ + return smp_ops.cpu_up(cpu); +} +static inline void smp_cpus_done(unsigned int max_cpus) +{ + smp_ops.smp_cpus_done(max_cpus); +} + +static inline void smp_send_stop(void) +{ + smp_ops.smp_send_stop(); +} +static inline void smp_send_reschedule(int cpu) +{ + smp_ops.smp_send_reschedule(cpu); +} + +static inline int smp_call_function_mask(cpumask_t mask, + void (*func) (void *info), + void *info, int wait) +{ + return smp_ops.smp_call_function_mask(mask, func, info, wait); +} + +void native_smp_prepare_boot_cpu(void); +void native_smp_prepare_cpus(unsigned int max_cpus); +int native_cpu_up(unsigned int cpunum); +void native_smp_cpus_done(unsigned int max_cpus); + +#ifndef CONFIG_PARAVIRT +#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ +do { } while (0) +#endif +#endif /* __ASSEMBLY__ */ + #ifdef CONFIG_X86_32 # include "smp_32.h" #else # include "smp_64.h" #endif + +#endif diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h index e10b7af..a53b03f 100644 --- a/include/asm-x86/smp_32.h +++ b/include/asm-x86/smp_32.h @@ -53,64 +53,6 @@ extern void cpu_uninit(void); extern void remove_siblinginfo(int cpu); #endif -struct smp_ops -{ - void (*smp_prepare_boot_cpu)(void); - void (*smp_prepare_cpus)(unsigned max_cpus); - int (*cpu_up)(unsigned cpu); - void (*smp_cpus_done)(unsigned max_cpus); - - void (*smp_send_stop)(void); - void (*smp_send_reschedule)(int cpu); - int (*smp_call_function_mask)(cpumask_t mask, - void (*func)(void *info), void *info, - int wait); -}; - -extern struct smp_ops smp_ops; - -static inline void smp_prepare_boot_cpu(void) -{ - smp_ops.smp_prepare_boot_cpu(); -} -static inline void smp_prepare_cpus(unsigned int max_cpus) -{ - smp_ops.smp_prepare_cpus(max_cpus); -} -static inline int __cpu_up(unsigned int cpu) -{ - return smp_ops.cpu_up(cpu); -} -static inline void smp_cpus_done(unsigned int max_cpus) -{ - smp_ops.smp_cpus_done(max_cpus); -} - -static inline void smp_send_stop(void) -{ - smp_ops.smp_send_stop(); -} -static inline void smp_send_reschedule(int cpu) -{ - smp_ops.smp_send_reschedule(cpu); -} -static inline int smp_call_function_mask(cpumask_t mask, - void (*func) (void *info), void *info, - int wait) -{ - return smp_ops.smp_call_function_mask(mask, func, info, wait); -} - -void native_smp_prepare_boot_cpu(void); -void native_smp_prepare_cpus(unsigned int max_cpus); -int native_cpu_up(unsigned int cpunum); -void native_smp_cpus_done(unsigned int max_cpus); - -#ifndef CONFIG_PARAVIRT -#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ -do { } while (0) -#endif - /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h index ab612b0..279ff92 100644 --- a/include/asm-x86/smp_64.h +++ b/include/asm-x86/smp_64.h @@ -36,9 +36,6 @@ extern volatile unsigned long smp_invalidate_needed; extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; -extern void smp_send_reschedule(int cpu); -extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), - void *info, int wait); /* * cpu_sibling_map and cpu_core_map now live @@ -127,4 +124,3 @@ extern unsigned int boot_cpu_id; #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ #endif - -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 22:06:05
|
This patch adds native hooks for debugreg handling functions, and for the native load_rsp0 function. The later also have its call sites patched. There's some room for consolidation in the processor*.h headers, and it is done, for paravirt related functions Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/smpboot_64.c | 2 +- include/asm-x86/msr.h | 67 ------------------ include/asm-x86/processor.h | 146 ++++++++++++++++++++++++++++++++++++++++ include/asm-x86/processor_32.h | 138 +------------------------------------- include/asm-x86/processor_64.h | 32 ++++----- 6 files changed, 165 insertions(+), 222 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6472f37..9647a10 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -589,7 +589,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Reload esp0, LDT and the page table pointer: */ - tss->rsp0 = next->rsp0; + load_esp0(tss, next); /* * Switch DS and ES. diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c index 17e54fa..8fb0f90 100644 --- a/arch/x86/kernel/smpboot_64.c +++ b/arch/x86/kernel/smpboot_64.c @@ -613,7 +613,7 @@ do_rest: start_rip = setup_trampoline(); init_rsp = c_idle.idle->thread.rsp; - per_cpu(init_tss,cpu).rsp0 = init_rsp; + load_esp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); initial_code = start_secondary; clear_tsk_thread_flag(c_idle.idle, TIF_FORK); diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index ba4b314..48f73c7 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -253,73 +253,6 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) : "=a" (low), "=d" (high) \ : "c" (counter)) -static inline void cpuid(int op, unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op)); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) -{ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (op), "c" (count)); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax; - - __asm__("cpuid" - : "=a" (eax) - : "0" (op) - : "bx", "cx", "dx"); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx; - - __asm__("cpuid" - : "=a" (eax), "=b" (ebx) - : "0" (op) - : "cx", "dx" ); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ecx; - - __asm__("cpuid" - : "=a" (eax), "=c" (ecx) - : "0" (op) - : "bx", "dx" ); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) -{ - unsigned int eax, edx; - - __asm__("cpuid" - : "=a" (eax), "=d" (edx) - : "0" (op) - : "bx", "cx"); - return edx; -} - #ifdef CONFIG_SMP void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 46e1c04..a576a72 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -1,5 +1,151 @@ +#ifndef _X86_PROCESSOR_H_ +#define _X86_PROCESSOR_H_ + +#include <linux/kernel.h> +#include <asm/bug.h> + +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +static inline unsigned long native_get_debugreg(int regno) +{ + unsigned long val = 0; /* Damn you, gcc! */ + + switch (regno) { + case 0: + asm volatile ("mov %%db0, %0" :"=r" (val)); break; + case 1: + asm volatile ("mov %%db1, %0" :"=r" (val)); break; + case 2: + asm volatile ("mov %%db2, %0" :"=r" (val)); break; + case 3: + asm volatile ("mov %%db3, %0" :"=r" (val)); break; + case 6: + asm volatile ("mov %%db6, %0" :"=r" (val)); break; + case 7: + asm volatile ("mov %%db7, %0" :"=r" (val)); break; + default: + WARN_ON(1); + } + return val; +} + +static inline void native_set_debugreg(int regno, unsigned long value) +{ + switch (regno) { + case 0: + asm("mov %0,%%db0" : :"r" (value) : "memory"); + break; + case 1: + asm("mov %0,%%db1" : :"r" (value) : "memory"); + break; + case 2: + asm("mov %0,%%db2" : :"r" (value) : "memory"); + break; + case 3: + asm("mov %0,%%db3" : :"r" (value) : "memory"); + break; + case 6: + asm("mov %0,%%db6" : :"r" (value) : "memory"); + break; + case 7: + asm("mov %0,%%db7" : :"r" (value) : "memory"); + break; + default: + WARN_ON(1); + } +} + #ifdef CONFIG_X86_32 # include "processor_32.h" #else # include "processor_64.h" #endif + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define paravirt_enabled() 0 +#define __cpuid native_cpuid +#define SWAPGS swapgs + +static inline void load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + native_load_esp0(tss, thread); +} + +/* + * These special macros can be used to get or set a debugging register + */ +#define get_debugreg(var, register) \ + (var) = native_get_debugreg(register) +#define set_debugreg(value, register) \ + native_set_debugreg(register, value) + +#define set_iopl_mask native_set_iopl_mask +#endif /* CONFIG_PARAVIRT */ + +/* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. + */ +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = 0; + __cpuid(eax, ebx, ecx, edx); +} + +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(int op, int count, unsigned *eax, + unsigned *ebx, unsigned *ecx, unsigned *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +/* + * CPUID functions returning a single datum + */ +static inline unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return eax; +} +static inline unsigned int cpuid_ebx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ebx; +} +static inline unsigned int cpuid_ecx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return ecx; +} +static inline unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid(op, &eax, &ebx, &ecx, &edx); + return edx; +} +#endif diff --git a/include/asm-x86/processor_32.h b/include/asm-x86/processor_32.h index 3b51a18..233fee3 100644 --- a/include/asm-x86/processor_32.h +++ b/include/asm-x86/processor_32.h @@ -132,17 +132,6 @@ extern void detect_ht(struct cpuinfo_x86 *c); static inline void detect_ht(struct cpuinfo_x86 *c) {} #endif -static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - __asm__("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} #define load_cr3(pgdir) write_cr3(__pa(pgdir)) @@ -505,56 +494,6 @@ static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct } } - -static inline unsigned long native_get_debugreg(int regno) -{ - unsigned long val = 0; /* Damn you, gcc! */ - - switch (regno) { - case 0: - asm("movl %%db0, %0" :"=r" (val)); break; - case 1: - asm("movl %%db1, %0" :"=r" (val)); break; - case 2: - asm("movl %%db2, %0" :"=r" (val)); break; - case 3: - asm("movl %%db3, %0" :"=r" (val)); break; - case 6: - asm("movl %%db6, %0" :"=r" (val)); break; - case 7: - asm("movl %%db7, %0" :"=r" (val)); break; - default: - BUG(); - } - return val; -} - -static inline void native_set_debugreg(int regno, unsigned long value) -{ - switch (regno) { - case 0: - asm("movl %0,%%db0" : /* no output */ :"r" (value)); - break; - case 1: - asm("movl %0,%%db1" : /* no output */ :"r" (value)); - break; - case 2: - asm("movl %0,%%db2" : /* no output */ :"r" (value)); - break; - case 3: - asm("movl %0,%%db3" : /* no output */ :"r" (value)); - break; - case 6: - asm("movl %0,%%db6" : /* no output */ :"r" (value)); - break; - case 7: - asm("movl %0,%%db7" : /* no output */ :"r" (value)); - break; - default: - BUG(); - } -} - /* * Set IOPL bits in EFLAGS from given mask */ @@ -571,82 +510,9 @@ static inline void native_set_iopl_mask(unsigned mask) : "i" (~X86_EFLAGS_IOPL), "r" (mask)); } -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define paravirt_enabled() 0 -#define __cpuid native_cpuid - -static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread) -{ - native_load_esp0(tss, thread); -} - -/* - * These special macros can be used to get or set a debugging register - */ -#define get_debugreg(var, register) \ - (var) = native_get_debugreg(register) -#define set_debugreg(value, register) \ - native_set_debugreg(register, value) - -#define set_iopl_mask native_set_iopl_mask -#endif /* CONFIG_PARAVIRT */ - -/* - * Generic CPUID function - * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx - * resulting in stale register contents being returned. - */ -static inline void cpuid(unsigned int op, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = 0; - __cpuid(eax, ebx, ecx, edx); -} - -/* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(unsigned int op, int count, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -/* - * CPUID functions returning a single datum - */ -static inline unsigned int cpuid_eax(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return eax; -} -static inline unsigned int cpuid_ebx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ebx; -} -static inline unsigned int cpuid_ecx(unsigned int op) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return ecx; -} -static inline unsigned int cpuid_edx(unsigned int op) +/* We don't really have one */ +static inline void native_swapgs(void) { - unsigned int eax, ebx, ecx, edx; - - cpuid(op, &eax, &ebx, &ecx, &edx); - return edx; } /* generic versions from gas */ diff --git a/include/asm-x86/processor_64.h b/include/asm-x86/processor_64.h index 6c0d96a..befecc6 100644 --- a/include/asm-x86/processor_64.h +++ b/include/asm-x86/processor_64.h @@ -114,21 +114,13 @@ extern unsigned long mmu_cr4_features; static inline void set_in_cr4 (unsigned long mask) { mmu_cr4_features |= mask; - __asm__("movq %%cr4,%%rax\n\t" - "orq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (mask) - :"ax"); + write_cr4(read_cr4() | mask); } static inline void clear_in_cr4 (unsigned long mask) { mmu_cr4_features &= ~mask; - __asm__("movq %%cr4,%%rax\n\t" - "andq %0,%%rax\n\t" - "movq %%rax,%%cr4\n" - : : "irg" (~mask) - :"ax"); + write_cr4(read_cr4() & ~mask); } @@ -249,6 +241,12 @@ struct thread_struct { .rsp0 = (unsigned long)&init_stack + sizeof(init_stack) \ } +static inline void native_load_esp0(struct tss_struct *tss, + struct thread_struct *thread) +{ + tss->rsp0 = thread->rsp0; +} + #define INIT_MMAP \ { &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL } @@ -264,13 +262,13 @@ struct thread_struct { set_fs(USER_DS); \ } while(0) -#define get_debugreg(var, register) \ - __asm__("movq %%db" #register ", %0" \ - :"=r" (var)) -#define set_debugreg(value, register) \ - __asm__("movq %0,%%db" #register \ - : /* no output */ \ - :"r" (value)) +extern void native_swapgs(void); + +/* We have it for simmetry with i386 code, but we don't really use it */ +static inline void native_set_iopl_mask(unsigned mask) +{ + +} struct task_struct; struct mm_struct; -- 1.4.4.2 |
From: Jeremy F. <je...@go...> - 2007-11-10 00:30:14
|
Glauber de Oliveira Costa wrote: > This patch consolidates part of the pieces of smp for both architectures. > (i386 and x86_64). It makes part the calls go through smp_ops, and shares > code for those functions in smpcommon.c > > There's more room for code sharing here, but it is left as an exercise to > the reader ;-) > I'm getting link errors in 32-bit: arch/x86/kernel/built-in.o: In function `native_smp_send_reschedule': /home/jeremy/hg/xen/paravirt/linux/arch/x86/kernel/smpcommon.c:262: undefined reference to `genapic' arch/x86/kernel/built-in.o: In function `native_smp_call_function_mask': /home/jeremy/hg/xen/paravirt/linux/arch/x86/kernel/smpcommon.c:113: undefined reference to `genapic' J |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:25
|
Those are the hooks needed for paravirt at entry_64.S In general, they follow the way of i386. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/entry_64.S | 108 +++++++++++++++++++++++++++----------------- 1 files changed, 66 insertions(+), 42 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3a058bb..b6d7008 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -50,6 +50,7 @@ #include <asm/hw_irq.h> #include <asm/page.h> #include <asm/irqflags.h> +#include <asm/paravirt.h> .code64 @@ -57,6 +58,20 @@ #define retint_kernel retint_restore_args #endif +#ifdef CONFIG_PARAVIRT +ENTRY(native_irq_enable_syscall_ret) + movq %gs:pda_oldrsp,%rsp + swapgs + sysretq +/* + * This could well be defined as a C function, but as it is only used here, + * let it be locally defined + */ +ENTRY(native_swapgs) + swapgs + retq +#endif /* CONFIG_PARAVIRT */ + .macro TRACE_IRQS_IRETQ offset=ARGOFFSET #ifdef CONFIG_TRACE_IRQFLAGS @@ -216,14 +231,21 @@ ENTRY(system_call) CFI_DEF_CFA rsp,PDA_STACKOFFSET CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ - swapgs + SWAPGS_UNSAFE_STACK + /* + * A hypervisor implementation might want to use a label + * after the swapgs, so that it can do the swapgs + * for the guest and jump here on syscall. + */ +ENTRY(system_call_after_swapgs) + movq %rsp,%gs:pda_oldrsp movq %gs:pda_kernelstack,%rsp /* * No need to follow this irqs off/on section - it's straight * and short: */ - sti + ENABLE_INTERRUPTS(CLBR_NONE) SAVE_ARGS 8,1 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) @@ -246,7 +268,7 @@ ret_from_sys_call: sysret_check: LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -260,9 +282,7 @@ sysret_check: CFI_REGISTER rip,rcx RESTORE_ARGS 0,-ARG_SKIP,1 /*CFI_REGISTER rflags,r11*/ - movq %gs:pda_oldrsp,%rsp - swapgs - sysretq + ENABLE_INTERRUPTS_SYSCALL_RET CFI_RESTORE_STATE /* Handle reschedules */ @@ -271,7 +291,7 @@ sysret_careful: bt $TIF_NEED_RESCHED,%edx jnc sysret_signal TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) pushq %rdi CFI_ADJUST_CFA_OFFSET 8 call schedule @@ -282,7 +302,7 @@ sysret_careful: /* Handle a signal */ sysret_signal: TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx jz 1f @@ -295,7 +315,7 @@ sysret_signal: 1: movl $_TIF_NEED_RESCHED,%edi /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check @@ -327,7 +347,7 @@ tracesys: */ .globl int_ret_from_sys_call int_ret_from_sys_call: - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $3,CS-ARGOFFSET(%rsp) je retint_restore_args @@ -349,20 +369,20 @@ int_careful: bt $TIF_NEED_RESCHED,%edx jnc int_very_careful TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) pushq %rdi CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check /* handle signals and tracing -- both require a full stack frame */ int_very_careful: TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) SAVE_REST /* Check for syscall exit trace */ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx @@ -385,7 +405,7 @@ int_signal: 1: movl $_TIF_NEED_RESCHED,%edi int_restore_rest: RESTORE_REST - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check CFI_ENDPROC @@ -506,7 +526,7 @@ END(stub_rt_sigreturn) CFI_DEF_CFA_REGISTER rbp testl $3,CS(%rdi) je 1f - swapgs + SWAPGS /* irqcount is used to check if a CPU is already on an interrupt stack or not. While this is essentially redundant with preempt_count it is a little cheaper to use a separate counter in the PDA @@ -527,7 +547,7 @@ ENTRY(common_interrupt) interrupt do_IRQ /* 0(%rsp): oldrsp-ARGOFFSET */ ret_from_intr: - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF decl %gs:pda_irqcount leaveq @@ -556,13 +576,13 @@ retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ - cli + DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ - swapgs + SWAPGS jmp restore_args retint_restore_args: /* return to kernel space */ - cli + DISABLE_INTERRUPTS(CLBR_ANY) /* * The iretq could re-enable interrupts: */ @@ -570,10 +590,14 @@ retint_restore_args: /* return to kernel space */ restore_args: RESTORE_ARGS 0,8,0 iret_label: +#ifdef CONFIG_PARAVIRT + INTERRUPT_RETURN +#endif +ENTRY(native_iret) iretq .section __ex_table,"a" - .quad iret_label,bad_iret + .quad native_iret, bad_iret .previous .section .fixup,"ax" /* force a signal here? this matches i386 behaviour */ @@ -581,24 +605,24 @@ iret_label: bad_iret: movq $11,%rdi /* SIGSEGV */ TRACE_IRQS_ON - sti - jmp do_exit - .previous - + ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + jmp do_exit + .previous + /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE bt $TIF_NEED_RESCHED,%edx jnc retint_signal TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) pushq %rdi CFI_ADJUST_CFA_OFFSET 8 call schedule popq %rdi CFI_ADJUST_CFA_OFFSET -8 GET_THREAD_INFO(%rcx) - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp retint_check @@ -606,14 +630,14 @@ retint_signal: testl $(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx jz retint_swapgs TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) SAVE_REST movq $-1,ORIG_RAX(%rsp) xorl %esi,%esi # oldset movq %rsp,%rdi # &pt_regs call do_notify_resume RESTORE_REST - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF movl $_TIF_NEED_RESCHED,%edi GET_THREAD_INFO(%rcx) @@ -731,7 +755,7 @@ END(spurious_interrupt) rdmsr testl %edx,%edx js 1f - swapgs + SWAPGS xorl %ebx,%ebx 1: .if \ist @@ -747,7 +771,7 @@ END(spurious_interrupt) .if \ist addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif - cli + DISABLE_INTERRUPTS(CLBR_NONE) .if \irqtrace TRACE_IRQS_OFF .endif @@ -776,10 +800,10 @@ paranoid_swapgs\trace: .if \trace TRACE_IRQS_IRETQ 0 .endif - swapgs + SWAPGS_UNSAFE_STACK paranoid_restore\trace: RESTORE_ALL 8 - iretq + INTERRUPT_RETURN paranoid_userspace\trace: GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%ebx @@ -794,11 +818,11 @@ paranoid_userspace\trace: .if \trace TRACE_IRQS_ON .endif - sti + ENABLE_INTERRUPTS(CLBR_NONE) xorl %esi,%esi /* arg2: oldset */ movq %rsp,%rdi /* arg1: &pt_regs */ call do_notify_resume - cli + DISABLE_INTERRUPTS(CLBR_NONE) .if \trace TRACE_IRQS_OFF .endif @@ -807,9 +831,9 @@ paranoid_schedule\trace: .if \trace TRACE_IRQS_ON .endif - sti + ENABLE_INTERRUPTS(CLBR_ANY) call schedule - cli + DISABLE_INTERRUPTS(CLBR_ANY) .if \trace TRACE_IRQS_OFF .endif @@ -862,7 +886,7 @@ KPROBE_ENTRY(error_entry) testl $3,CS(%rsp) je error_kernelspace error_swapgs: - swapgs + SWAPGS error_sti: movq %rdi,RDI(%rsp) CFI_REL_OFFSET rdi,RDI @@ -874,7 +898,7 @@ error_sti: error_exit: movl %ebx,%eax RESTORE_REST - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax @@ -911,12 +935,12 @@ ENTRY(load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 - cli - swapgs + DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) + SWAPGS gs_change: movl %edi,%gs 2: mfence /* workaround */ - swapgs + SWAPGS popf CFI_ADJUST_CFA_OFFSET -8 ret @@ -930,7 +954,7 @@ ENDPROC(load_gs_index) .section .fixup,"ax" /* running with kernelgs */ bad_gs: - swapgs /* switch back to user gs */ + SWAPGS /* switch back to user gs */ xorl %eax,%eax movl %eax,%gs jmp 2b -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:39
|
This patch introduces, and patch callers when needed, native versions for read/write_crX functions, clts and wbinvd. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/mm/pageattr_64.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/x86/mm/pageattr_64.c b/arch/x86/mm/pageattr_64.c index 14ab327..3a483a8 100644 --- a/arch/x86/mm/pageattr_64.c +++ b/arch/x86/mm/pageattr_64.c @@ -13,6 +13,7 @@ #include <asm/tlbflush.h> #include <asm/uaccess.h> #include <asm/io.h> +#include <asm/paravirt.h> pte_t *lookup_address(unsigned long address) { @@ -84,7 +85,7 @@ static void flush_kernel_map(void *arg) much cheaper than WBINVD. */ /* clflush is still broken. Disable for now. */ if (1 || !cpu_has_clflush) { - asm volatile("wbinvd" ::: "memory"); + wbinvd(); } else { list_for_each_entry(pg, l, lru) { void *addr = page_address(pg); -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:34
|
The interrupt initialization routine becomes native_init_IRQ and will be overriden later in case paravirt is on. The interrupt array is made visible for guests such lguest, that will need to have their own initialization mechanism (though using most of the same irq lines) later on. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/i8259_64.c | 7 +++++-- include/asm-x86/irq_64.h | 3 +++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/i8259_64.c b/arch/x86/kernel/i8259_64.c index 3041e59..53955f4 100644 --- a/arch/x86/kernel/i8259_64.c +++ b/arch/x86/kernel/i8259_64.c @@ -77,7 +77,7 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) /* for the irq vectors */ -static void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { +void (*interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { IRQLIST_16(0x2), IRQLIST_16(0x3), IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), @@ -456,7 +456,10 @@ void __init init_ISA_irqs (void) } } -void __init init_IRQ(void) +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) { int i; diff --git a/include/asm-x86/irq_64.h b/include/asm-x86/irq_64.h index 5006c6e..4f02446 100644 --- a/include/asm-x86/irq_64.h +++ b/include/asm-x86/irq_64.h @@ -46,6 +46,9 @@ static __inline__ int irq_canonicalize(int irq) extern void fixup_irqs(cpumask_t map); #endif +#include <linux/init.h> +void native_init_IRQ(void); + #define __ARCH_HAS_DO_SOFTIRQ 1 #endif /* _ASM_IRQ_H */ -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:51
|
When paravirtualization is disabled, the kernel is always running at ring 0. So report it in the appropriate macro Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- include/asm-x86/segment_64.h | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/include/asm-x86/segment_64.h b/include/asm-x86/segment_64.h index 04b8ab2..240c1bf 100644 --- a/include/asm-x86/segment_64.h +++ b/include/asm-x86/segment_64.h @@ -50,4 +50,8 @@ #define GDT_SIZE (GDT_ENTRIES * 8) #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) +#ifndef CONFIG_PARAVIRT +#define get_kernel_rpl() 0 +#endif + #endif -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:44
|
Export math_state_restore symbol, so it can be used for hypervisors. They are commonly loaded as modules (lguest being an example). Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/traps_64.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 4d752a8..0876692 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -1069,6 +1069,7 @@ asmlinkage void math_state_restore(void) task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } +EXPORT_SYMBOL_GPL(math_state_restore); void __init trap_init(void) { -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:52
|
This patch goes one step forward in consolidating the msr.h header. It shares code between i386 and x86_64, instead of duplicating the code for tsc reading, msr reading/writing, etc. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/ia32/syscall32.c | 2 +- arch/x86/kernel/setup64.c | 6 +- arch/x86/kernel/tsc_64.c | 17 +++- arch/x86/kernel/vsyscall_64.c | 4 +- arch/x86/vdso/vgetcpu.c | 4 +- include/asm-x86/alternative_32.h | 17 +++- include/asm-x86/alternative_64.h | 27 ++++- include/asm-x86/msr.h | 225 ++++++++++---------------------------- include/asm-x86/tsc.h | 33 +++++- 9 files changed, 151 insertions(+), 184 deletions(-) diff --git a/arch/x86/ia32/syscall32.c b/arch/x86/ia32/syscall32.c index d751d96..a1247ed 100644 --- a/arch/x86/ia32/syscall32.c +++ b/arch/x86/ia32/syscall32.c @@ -82,5 +82,5 @@ void syscall32_cpu_init(void) checking_wrmsrl(MSR_IA32_SYSENTER_ESP, 0ULL); checking_wrmsrl(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - wrmsrl(MSR_CSTAR, ia32_cstar_target); + wrmsrl(MSR_CSTAR, (u64)ia32_cstar_target); } diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 3558ac7..50b7514 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -122,7 +122,7 @@ void pda_init(int cpu) asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); /* Memory clobbers used to order PDA accessed */ mb(); - wrmsrl(MSR_GS_BASE, pda); + wrmsrl(MSR_GS_BASE, (u64)pda); mb(); pda->cpunumber = cpu; @@ -161,8 +161,8 @@ void syscall_init(void) * but only a 32bit target. LSTAR sets the 64bit rip. */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); - wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); + wrmsrl(MSR_LSTAR, (u64)system_call); + wrmsrl(MSR_CSTAR, (u64)ignore_sysret); #ifdef CONFIG_IA32_EMULATION syscall32_cpu_init (); diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 9c70af4..4502539 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -30,7 +30,7 @@ static unsigned long long cycles_2_ns(unsigned long long cyc) return (cyc * cyc2ns_scale) >> NS_SCALE; } -unsigned long long sched_clock(void) +unsigned long long native_sched_clock(void) { unsigned long a = 0; @@ -44,6 +44,19 @@ unsigned long long sched_clock(void) return cycles_2_ns(a); } +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long +sched_clock(void) __attribute__((alias("native_sched_clock"))); +#endif + + static int tsc_unstable; inline int check_tsc_unstable(void) @@ -256,7 +269,7 @@ static cycle_t read_tsc(void) static cycle_t __vsyscall_fn vread_tsc(void) { - cycle_t ret = (cycle_t)get_cycles_sync(); + cycle_t ret = (cycle_t)vget_cycles_sync(); return ret; } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index ad4005c..1425d02 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -190,7 +190,7 @@ time_t __vsyscall(1) vtime(time_t *t) long __vsyscall(2) vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) { - unsigned int dummy, p; + unsigned int p; unsigned long j = 0; /* Fast cache - only recompute value once per jiffies and avoid @@ -205,7 +205,7 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) p = tcache->blob[1]; } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ - rdtscp(dummy, dummy, p); + native_read_tscp(&p); } else { /* Load per CPU data from GDT */ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index 91f6e85..61d0def 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c @@ -15,7 +15,7 @@ long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) { - unsigned int dummy, p; + unsigned int p; unsigned long j = 0; /* Fast cache - only recompute value once per jiffies and avoid @@ -30,7 +30,7 @@ long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) p = tcache->blob[1]; } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) { /* Load per CPU data from RDTSCP */ - rdtscp(dummy, dummy, p); + native_read_tscp(&p); } else { /* Load per CPU data from GDT */ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); diff --git a/include/asm-x86/alternative_32.h b/include/asm-x86/alternative_32.h index bda6c81..1ed7708 100644 --- a/include/asm-x86/alternative_32.h +++ b/include/asm-x86/alternative_32.h @@ -101,7 +101,22 @@ static inline void alternatives_smp_switch(int smp) {} * use this macro(s) if you need more than one output parameter * in alternative_io */ -#define ASM_OUTPUT2(a, b) a, b +#define ASM_OUTPUT2(a, b...) a, b + +#define fixup_section(code, fixup, output, input...) \ + asm volatile("2: " code "\n" \ + "1:\n\t" \ + ".section .fixup,\"ax\"\n\t" \ + "3: " fixup "\n\t" \ + "jmp 1b\n\t" \ + ".previous\n\t" \ + ".section __ex_table,\"a\"\n" \ + " .align 4\n\t" \ + " .long 2b,3b\n\t" \ + ".previous" \ + : output \ + : input) + /* * Alternative inline assembly for SMP. diff --git a/include/asm-x86/alternative_64.h b/include/asm-x86/alternative_64.h index ab161e8..f080b69 100644 --- a/include/asm-x86/alternative_64.h +++ b/include/asm-x86/alternative_64.h @@ -141,14 +141,29 @@ static inline void alternatives_smp_switch(int smp) {} * use this macro(s) if you need more than one output parameter * in alternative_io */ -#define ASM_OUTPUT2(a, b) a, b - -struct paravirt_patch; +#define ASM_OUTPUT2(a, b...) a, b + +#define fixup_section(code, fixup, output, input...) \ + asm volatile("2: " code "\n" \ + "1:\n\t" \ + ".section .fixup,\"ax\"\n\t" \ + "3: " fixup "\n\t" \ + " jmp 1b\n\t" \ + ".previous\n\t" \ + ".section __ex_table,\"a\"\n" \ + " .align 8\n\t" \ + " .quad 2b,3b\n\t" \ + ".previous" \ + : output \ + : input) + +struct paravirt_patch_site; #ifdef CONFIG_PARAVIRT -void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end); +void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end); #else -static inline void -apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) +static inline void apply_paravirt(struct paravirt_patch_site *start, + struct paravirt_patch_site *end) {} #define __parainstructions NULL #define __parainstructions_end NULL diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index 48f73c7..9171564 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -3,8 +3,6 @@ #include <asm/msr-index.h> -#ifdef __i386__ - #ifdef __KERNEL__ #ifndef __ASSEMBLY__ @@ -12,70 +10,66 @@ static inline unsigned long long native_read_msr(unsigned int msr) { - unsigned long long val; - - asm volatile("rdmsr" : "=A" (val) : "c" (msr)); - return val; + unsigned long a, d; + asm volatile("rdmsr" : "=a" (a), "=d" (d) : "c" (msr)); + return a | ((u64)d << 32); } static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { - unsigned long long val; - - asm volatile("2: rdmsr ; xorl %0,%0\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: movl %3,%0 ; jmp 1b\n\t" - ".previous\n\t" - ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" - ".previous" - : "=r" (*err), "=A" (val) - : "c" (msr), "i" (-EFAULT)); - - return val; + unsigned long a, d; + fixup_section("rdmsr; xor %0, %0", "mov %4, %0", + ASM_OUTPUT2("=r" (*err), "=a"((a)), "=d"((d))), + "c"(msr), "i"(-EFAULT), "0"(0)); + return a | ((u64)d << 32); } -static inline void native_write_msr(unsigned int msr, unsigned long long val) +static inline void native_write_msr(unsigned int msr, unsigned low, + unsigned high) { - asm volatile("wrmsr" : : "c" (msr), "A"(val)); + asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high)); } static inline int native_write_msr_safe(unsigned int msr, - unsigned long long val) + unsigned low, unsigned high) { int err; - asm volatile("2: wrmsr ; xorl %0,%0\n" - "1:\n\t" - ".section .fixup,\"ax\"\n\t" - "3: movl %4,%0 ; jmp 1b\n\t" - ".previous\n\t" - ".section __ex_table,\"a\"\n" - " .align 4\n\t" - " .long 2b,3b\n\t" - ".previous" - : "=a" (err) - : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), - "i" (-EFAULT)); + fixup_section("wrmsr; xor %0, %0", "mov %4, %0", "=a" (err), + "c" (msr), "0" (low), "d" (high), + "i" (-EFAULT)); return err; } static inline unsigned long long native_read_tsc(void) { - unsigned long long val; - asm volatile("rdtsc" : "=A" (val)); - return val; + unsigned int low, high; + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + return low | ((u64)(high) << 32); } -static inline unsigned long long native_read_pmc(void) +static inline unsigned long long native_read_pmc(int counter) { - unsigned long long val; - asm volatile("rdpmc" : "=A" (val)); - return val; + unsigned long low, high; + asm volatile ("rdpmc" + : "=a" (low), "=d" (high) + : "c" (counter)); + + return low | ((u64)high << 32); } +static inline unsigned long long native_read_tscp(int *aux) +{ + unsigned long low, high; + asm volatile (".byte 0x0f,0x01,0xf9" + : "=a" (low), "=d" (high), "=c" (*aux)); + return low | ((u64)high >> 32); +} + +#endif /* ! __ASSEMBLY__ */ +#endif /* __KERNEL__ */ + +#ifndef __ASSEMBLY__ #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -93,20 +87,26 @@ static inline unsigned long long native_read_pmc(void) (val2) = (u32)(__val >> 32); \ } while(0) -static inline void wrmsr(u32 __msr, u32 __low, u32 __high) +static inline void wrmsr(unsigned int msr, unsigned int low, unsigned int high) { - native_write_msr(__msr, ((u64)__high << 32) | __low); + native_write_msr(msr, low, high); } #define rdmsrl(msr,val) \ ((val) = native_read_msr(msr)) -#define wrmsrl(msr,val) native_write_msr(msr, val) +static inline void wrmsrl(unsigned int msr, unsigned long long val) +{ + unsigned long low, high; + low = (u32)val; + high = val >> 32; + native_write_msr(msr, low, high); +} /* wrmsr with exception handling */ -static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high) +static inline int wrmsr_safe(int msr, int low, int high) { - return native_write_msr_safe(__msr, ((u64)__high << 32) | __low); + return native_write_msr_safe(msr, low, high); } /* rdmsr with exception handling */ @@ -129,130 +129,28 @@ static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high) #define rdpmc(counter,low,high) \ do { \ - u64 _l = native_read_pmc(); \ + u64 _l = native_read_pmc(counter); \ (low) = (u32)_l; \ (high) = (u32)(_l >> 32); \ - } while(0) -#endif /* !CONFIG_PARAVIRT */ - -#ifdef CONFIG_SMP -void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); -void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); -int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -#else /* CONFIG_SMP */ -static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - rdmsr(msr_no, *l, *h); -} -static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - wrmsr(msr_no, l, h); -} -static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - return rdmsr_safe(msr_no, l, h); -} -static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - return wrmsr_safe(msr_no, l, h); -} -#endif /* CONFIG_SMP */ -#endif /* ! __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#else /* __i386__ */ - -#ifndef __ASSEMBLY__ -#include <linux/errno.h> -/* - * Access to machine-specific registers (available on 586 and better only) - * Note: the rd* operations modify the parameters directly (without using - * pointer indirection), this allows gcc to optimize better - */ + } while (0) -#define rdmsr(msr,val1,val2) \ - __asm__ __volatile__("rdmsr" \ - : "=a" (val1), "=d" (val2) \ - : "c" (msr)) - - -#define rdmsrl(msr,val) do { unsigned long a__,b__; \ - __asm__ __volatile__("rdmsr" \ - : "=a" (a__), "=d" (b__) \ - : "c" (msr)); \ - val = a__ | (b__<<32); \ -} while(0) - -#define wrmsr(msr,val1,val2) \ - __asm__ __volatile__("wrmsr" \ - : /* no outputs */ \ - : "c" (msr), "a" (val1), "d" (val2)) +#define rdtscp(low, high, aux) \ + do { \ + unsigned long long _val = native_read_tscp(&(aux)); \ + (low) = (u32)_val; \ + (high) = (u32)(_val >> 32); \ + } while (0) -#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) +#define rdtscpll(val, aux) (val) = native_read_tscp(&(aux)) -/* wrmsr with exception handling */ -#define wrmsr_safe(msr,a,b) ({ int ret__; \ - asm volatile("2: wrmsr ; xorl %0,%0\n" \ - "1:\n\t" \ - ".section .fixup,\"ax\"\n\t" \ - "3: movl %4,%0 ; jmp 1b\n\t" \ - ".previous\n\t" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n\t" \ - " .quad 2b,3b\n\t" \ - ".previous" \ - : "=a" (ret__) \ - : "c" (msr), "0" (a), "d" (b), "i" (-EFAULT)); \ - ret__; }) +#endif /* !CONFIG_PARAVIRT */ #define checking_wrmsrl(msr,val) wrmsr_safe(msr,(u32)(val),(u32)((val)>>32)) -#define rdmsr_safe(msr,a,b) \ - ({ int ret__; \ - asm volatile ("1: rdmsr\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %4,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - ".section __ex_table,\"a\"\n" \ - " .align 8\n" \ - " .quad 1b,3b\n" \ - ".previous":"=&bDS" (ret__), "=a"(*(a)), "=d"(*(b)) \ - :"c"(msr), "i"(-EIO), "0"(0)); \ - ret__; }) - -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) - -#define rdtscl(low) \ - __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") - -#define rdtscp(low,high,aux) \ - asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux)) - -#define rdtscll(val) do { \ - unsigned int __a,__d; \ - asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ - (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ -} while(0) - -#define rdtscpll(val, aux) do { \ - unsigned long __a, __d; \ - asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \ - (val) = (__d << 32) | __a; \ -} while (0) - #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0) -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - #ifdef CONFIG_SMP void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); @@ -275,9 +173,6 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { return wrmsr_safe(msr_no, l, h); } -#endif /* CONFIG_SMP */ -#endif /* __ASSEMBLY__ */ - -#endif /* !__i386__ */ - +#endif /* CONFIG_SMP */ +#endif /* ! __ASSEMBLY__ */ #endif diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index d7b1c4e..651e6ac 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -33,7 +33,7 @@ static inline cycles_t get_cycles(void) } /* Like get_cycles, but make sure the CPU is synchronized. */ -static __always_inline cycles_t get_cycles_sync(void) +static __always_inline cycles_t __get_cycles_sync(void) { unsigned long long ret; unsigned eax, edx; @@ -55,11 +55,40 @@ static __always_inline cycles_t get_cycles_sync(void) */ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); - rdtscll(ret); + return 0; +} + +static __always_inline cycles_t get_cycles_sync(void) +{ + unsigned long long ret; + ret = __get_cycles_sync(); + if (!ret) + rdtscll(ret); return ret; } +#ifdef CONFIG_PARAVIRT +/* + * For paravirt guests, some functionalities are executed through function + * pointers in the various pvops structures. + * These function pointers exist inside the kernel and can not + * be accessed by user space. To avoid this, we make a copy of the + * get_cycles_sync (called in kernel) but force the use of native_read_tsc. + * Ideally, the guest should set up it's own clock and vread + */ +static __always_inline long long vget_cycles_sync(void) +{ + unsigned long long ret; + ret = __get_cycles_sync(); + if (!ret) + ret = native_read_tsc(); + return ret; +} +#else +# define vget_cycles_sync() get_cycles_sync() +#endif + extern void tsc_init(void); extern void mark_tsc_unstable(char *reason); extern int unsynchronized_tsc(void); -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:30
|
This patch consolidates system.h header. For i386, it adds functions read/write_cr8 that ain't really needed, but will also not hurt. If they are used somewhere in i386 code, there's a bug anyway, and should be fixed. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- include/asm-x86/system.h | 134 +++++++++++++++++++++++++++++++++++++++++++ include/asm-x86/system_32.h | 102 -------------------------------- include/asm-x86/system_64.h | 77 ------------------------- 3 files changed, 134 insertions(+), 179 deletions(-) diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index 692562b..ef20916 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -1,5 +1,139 @@ +#ifndef _X86_SYSTEM_H_ +#define _X86_SYSTEM_H_ + +#include <linux/kernel.h> + +static inline void native_clts(void) +{ + asm volatile ("clts"); +} + +/* + * Volatile isn't enough to prevent the compiler from reordering the + * read/write functions for the control registers and messing everything up. + * A memory clobber would solve the problem, but would prevent reordering of + * all loads stores around it, which can hurt performance. Solution is to + * use a variable and mimic reads and writes to it to enforce serialization + */ +static unsigned long __force_order; + +static inline unsigned long native_read_cr0(void) +{ + unsigned long val; + asm volatile("mov %%cr0,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr0(unsigned long val) +{ + asm volatile("mov %0,%%cr0": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr2(void) +{ + unsigned long val; + asm volatile("mov %%cr2,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr2(unsigned long val) +{ + asm volatile("mov %0,%%cr2": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr3(void) +{ + unsigned long val; + asm volatile("mov %%cr3,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline void native_write_cr3(unsigned long val) +{ + asm volatile("mov %0,%%cr3": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr4(void) +{ + unsigned long val; + asm volatile("mov %%cr4,%0\n\t" :"=r" (val), "=m" (__force_order)); + return val; +} + +static inline unsigned long native_read_cr4_safe(void) +{ + unsigned long val; + /* This could fault if %cr4 does not exist. In x86_64, a cr4 always + * exists, so it will never fail. */ +#ifdef CONFIG_X86_32 + asm volatile("1: mov %%cr4, %0 \n" + "2: \n" + ".section __ex_table,\"a\" \n" + ".long 1b,2b \n" + ".previous \n" + : "=r" (val), "=m" (__force_order) : "0" (0)); +#else + val = native_read_cr4(); +#endif + return val; +} + +static inline void native_write_cr4(unsigned long val) +{ + asm volatile("mov %0,%%cr4": :"r" (val), "m" (__force_order)); +} + +static inline unsigned long native_read_cr8(void) +{ + unsigned long cr8; + asm volatile("mov %%cr8,%0" : "=r" (cr8), "=m" (__force_order)); + return cr8; +} + +static inline void native_write_cr8(unsigned long val) +{ + asm volatile("mov %0,%%cr8" : : "r" (val)); +} + +static inline void native_wbinvd(void) +{ + asm volatile("wbinvd": : :"memory"); +} + +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define read_cr0() (native_read_cr0()) +#define write_cr0(x) (native_write_cr0(x)) +#define read_cr2() (native_read_cr2()) +#define write_cr2(x) (native_write_cr2(x)) +#define read_cr3() (native_read_cr3()) +#define write_cr3(x) (native_write_cr3(x)) +#define read_cr4() (native_read_cr4()) +#define read_cr4_safe() (native_read_cr4_safe()) +#define write_cr4(x) (native_write_cr4(x)) +#define read_cr8() (native_read_cr8()) +#define write_cr8(x) (native_write_cr8(x)) +#define wbinvd() (native_wbinvd()) + +/* Clear the 'TS' bit */ +#define clts() (native_clts()) + +#endif/* CONFIG_PARAVIRT */ + +#define stts() write_cr0(8 | read_cr0()) + +#define nop() __asm__ __volatile__ ("nop") + #ifdef CONFIG_X86_32 # include "system_32.h" #else # include "system_64.h" #endif + +#endif diff --git a/include/asm-x86/system_32.h b/include/asm-x86/system_32.h index db6283e..26fc8f5 100644 --- a/include/asm-x86/system_32.h +++ b/include/asm-x86/system_32.h @@ -1,7 +1,6 @@ #ifndef __ASM_SYSTEM_H #define __ASM_SYSTEM_H -#include <linux/kernel.h> #include <asm/segment.h> #include <asm/cpufeature.h> #include <asm/cmpxchg.h> @@ -89,105 +88,6 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t" \ #define savesegment(seg, value) \ asm volatile("mov %%" #seg ",%0":"=rm" (value)) - -static inline void native_clts(void) -{ - asm volatile ("clts"); -} - -static inline unsigned long native_read_cr0(void) -{ - unsigned long val; - asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("movl %0,%%cr0": :"r" (val)); -} - -static inline unsigned long native_read_cr2(void) -{ - unsigned long val; - asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr2(unsigned long val) -{ - asm volatile("movl %0,%%cr2": :"r" (val)); -} - -static inline unsigned long native_read_cr3(void) -{ - unsigned long val; - asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); - return val; -} - -static inline void native_write_cr3(unsigned long val) -{ - asm volatile("movl %0,%%cr3": :"r" (val)); -} - -static inline unsigned long native_read_cr4(void) -{ - unsigned long val; - asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); - return val; -} - -static inline unsigned long native_read_cr4_safe(void) -{ - unsigned long val; - /* This could fault if %cr4 does not exist */ - asm volatile("1: movl %%cr4, %0 \n" - "2: \n" - ".section __ex_table,\"a\" \n" - ".long 1b,2b \n" - ".previous \n" - : "=r" (val): "0" (0)); - return val; -} - -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("movl %0,%%cr4": :"r" (val)); -} - -static inline void native_wbinvd(void) -{ - asm volatile("wbinvd": : :"memory"); -} - -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); -} - -#ifdef CONFIG_PARAVIRT -#include <asm/paravirt.h> -#else -#define read_cr0() (native_read_cr0()) -#define write_cr0(x) (native_write_cr0(x)) -#define read_cr2() (native_read_cr2()) -#define write_cr2(x) (native_write_cr2(x)) -#define read_cr3() (native_read_cr3()) -#define write_cr3(x) (native_write_cr3(x)) -#define read_cr4() (native_read_cr4()) -#define read_cr4_safe() (native_read_cr4_safe()) -#define write_cr4(x) (native_write_cr4(x)) -#define wbinvd() (native_wbinvd()) - -/* Clear the 'TS' bit */ -#define clts() (native_clts()) - -#endif/* CONFIG_PARAVIRT */ - -/* Set the 'TS' bit */ -#define stts() write_cr0(8 | read_cr0()) - #endif /* __KERNEL__ */ static inline unsigned long get_limit(unsigned long segment) @@ -198,8 +98,6 @@ static inline unsigned long get_limit(unsigned long segment) return __limit+1; } -#define nop() __asm__ __volatile__ ("nop") - /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h index 4cb2384..84a8f10 100644 --- a/include/asm-x86/system_64.h +++ b/include/asm-x86/system_64.h @@ -1,7 +1,6 @@ #ifndef __ASM_SYSTEM_H #define __ASM_SYSTEM_H -#include <linux/kernel.h> #include <asm/segment.h> #include <asm/cmpxchg.h> @@ -62,85 +61,9 @@ extern void load_gs_index(unsigned); ".previous" \ : :"r" (value), "r" (0)) -/* - * Clear and set 'TS' bit respectively - */ -#define clts() __asm__ __volatile__ ("clts") - -static inline unsigned long read_cr0(void) -{ - unsigned long cr0; - asm volatile("movq %%cr0,%0" : "=r" (cr0)); - return cr0; -} - -static inline void write_cr0(unsigned long val) -{ - asm volatile("movq %0,%%cr0" :: "r" (val)); -} - -static inline unsigned long read_cr2(void) -{ - unsigned long cr2; - asm volatile("movq %%cr2,%0" : "=r" (cr2)); - return cr2; -} - -static inline void write_cr2(unsigned long val) -{ - asm volatile("movq %0,%%cr2" :: "r" (val)); -} - -static inline unsigned long read_cr3(void) -{ - unsigned long cr3; - asm volatile("movq %%cr3,%0" : "=r" (cr3)); - return cr3; -} - -static inline void write_cr3(unsigned long val) -{ - asm volatile("movq %0,%%cr3" :: "r" (val) : "memory"); -} - -static inline unsigned long read_cr4(void) -{ - unsigned long cr4; - asm volatile("movq %%cr4,%0" : "=r" (cr4)); - return cr4; -} - -static inline void write_cr4(unsigned long val) -{ - asm volatile("movq %0,%%cr4" :: "r" (val) : "memory"); -} - -static inline unsigned long read_cr8(void) -{ - unsigned long cr8; - asm volatile("movq %%cr8,%0" : "=r" (cr8)); - return cr8; -} - -static inline void write_cr8(unsigned long val) -{ - asm volatile("movq %0,%%cr8" :: "r" (val) : "memory"); -} - -#define stts() write_cr0(8 | read_cr0()) - -#define wbinvd() \ - __asm__ __volatile__ ("wbinvd": : :"memory") #endif /* __KERNEL__ */ -static inline void clflush(volatile void *__p) -{ - asm volatile("clflush %0" : "+m" (*(char __force *)__p)); -} - -#define nop() __asm__ __volatile__ ("nop") - #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() barrier() -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:29
|
Among other things, using -traditional as a gcc option stops us from using macro token pasting, which is a feature we heavily rely on. There was still a use of -traditional in arch/x86/kernel/Makefile_64, which this patch removes. I don't see any problems building kernels in my x86_64 box without -traditional. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/Makefile_64 | 1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index ffee997..0714528 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -3,7 +3,6 @@ # extra-y := head_64.o head64.o init_task.o vmlinux.lds -EXTRA_AFLAGS := -traditional obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ ptrace_64.o time_64.o ioport_64.o ldt.o setup_64.o i8259_64.o sys_x86_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:28:56
|
This patch turns the set_p{te,md,ud,gd} functions into their native_ versions. There is no need to patch any caller. Also, it adds pte_update() and pte_update_defer() calls whenever we modify a page table entry. This last part was coded to match i386 as close as possible. Pieces of the header are moved to below the #ifdef CONFIG_PARAVIRT site, as they are users of the newly defined set_* macros. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- include/asm-x86/pgtable_64.h | 192 ++++++++++++++++++++++++++++-------------- 1 files changed, 128 insertions(+), 64 deletions(-) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 9b0ff47..592d613 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -57,56 +57,107 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; */ #define PTRS_PER_PTE 512 -#ifndef __ASSEMBLY__ +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else + +#define set_pte native_set_pte +#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) +#define set_pmd native_set_pmd +#define set_pud native_set_pud +#define set_pgd native_set_pgd +#define pte_clear(mm, addr, xp) \ +do { \ + set_pte_at(mm, addr, xp, __pte(0)); \ +} while (0) -#define pte_ERROR(e) \ - printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e)) -#define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), pmd_val(e)) -#define pud_ERROR(e) \ - printk("%s:%d: bad pud %p(%016lx).\n", __FILE__, __LINE__, &(e), pud_val(e)) -#define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), pgd_val(e)) +#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) +#define pud_clear native_pud_clear +#define pgd_clear native_pgd_clear +#define pte_update(mm, addr, ptep) do { } while (0) +#define pte_update_defer(mm, addr, ptep) do { } while (0) -#define pgd_none(x) (!pgd_val(x)) -#define pud_none(x) (!pud_val(x)) +#endif -static inline void set_pte(pte_t *dst, pte_t val) +#ifndef __ASSEMBLY__ + +static inline void native_set_pte(pte_t *dst, pte_t val) { - pte_val(*dst) = pte_val(val); + dst->pte = pte_val(val); } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) -static inline void set_pmd(pmd_t *dst, pmd_t val) +static inline void native_set_pmd(pmd_t *dst, pmd_t val) { - pmd_val(*dst) = pmd_val(val); + dst->pmd = pmd_val(val); } -static inline void set_pud(pud_t *dst, pud_t val) +static inline void native_set_pud(pud_t *dst, pud_t val) { - pud_val(*dst) = pud_val(val); + dst->pud = pud_val(val); } -static inline void pud_clear (pud_t *pud) +static inline void native_set_pgd(pgd_t *dst, pgd_t val) { - set_pud(pud, __pud(0)); + dst->pgd = pgd_val(val); } -static inline void set_pgd(pgd_t *dst, pgd_t val) +static inline void native_pud_clear(pud_t *pud) { - pgd_val(*dst) = pgd_val(val); -} + set_pud(pud, __pud(0)); +} -static inline void pgd_clear (pgd_t * pgd) +static inline void native_pgd_clear(pgd_t *pgd) { set_pgd(pgd, __pgd(0)); } -#define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0)) +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval) +{ + native_set_pte(ptep, pteval); +} + +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + native_set_pte_at(mm, addr, ptep, __pte(0)); +} + +static inline void native_pmd_clear(pmd_t *pmd) +{ + native_set_pmd(pmd, __pmd(0)); +} + + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %p(%016llx).\n", \ + __FILE__, __LINE__, &(e), (u64)pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %p(%016llx).\n", \ + __FILE__, __LINE__, &(e), (u64)pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pud %p(%016llx).\n", \ + __FILE__, __LINE__, &(e), (u64)pud_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %p(%016llx).\n", \ + __FILE__, __LINE__, &(e), (u64)pgd_val(e)) + +#define pgd_none(x) (!pgd_val(x)) +#define pud_none(x) (!pud_val(x)) struct mm_struct; -static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = __pte(xchg(&ptep->pte, 0)); + pte_update(mm, addr, ptep); + return pte; +} + +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + int full) { pte_t pte; if (full) { @@ -246,7 +297,6 @@ static inline unsigned long pmd_bad(pmd_t pmd) #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ @@ -255,11 +305,11 @@ static inline unsigned long pmd_bad(pmd_t pmd) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - pte_val(pte) = (page_nr << PAGE_SHIFT); - pte_val(pte) |= pgprot_val(pgprot); - pte_val(pte) &= __supported_pte_mask; - return pte; + unsigned long pte; + pte = (page_nr << PAGE_SHIFT); + pte |= pgprot_val(pgprot); + pte &= __supported_pte_mask; + return __pte(pte); } /* @@ -283,30 +333,6 @@ static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; } static inline pte_t pte_clrhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_PSE)); return pte; } -struct vm_area_struct; - -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); -} - -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - clear_bit(_PAGE_BIT_RW, &ptep->pte); -} - -/* - * Macro to mark a page protection value as "uncacheable". - */ -#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) - -static inline int pmd_large(pmd_t pte) { - return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; -} - - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -340,7 +366,6 @@ static inline int pmd_large(pmd_t pte) { pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) #define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) @@ -352,15 +377,53 @@ static inline int pmd_large(pmd_t pte) { /* page, protection -> pte */ #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) -#define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) - + +static inline pte_t __mk_pte_huge(pte_t entry) +{ + unsigned long pte; + pte = pte_val(entry); + pte |= _PAGE_PRESENT | _PAGE_PSE; + return __pte(pte); +} +#define mk_pte_huge(entry) ((entry) = __mk_pte_huge(entry)) + +#include <linux/mm_types.h> +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + int ret = 0; + if (!pte_young(*ptep)) + return 0; + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte); + pte_update(vma->vm_mm, addr, ptep); + return ret; +} + +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + clear_bit(_PAGE_BIT_RW, &ptep->pte); + pte_update(mm, addr, ptep); +} + +/* + * Macro to mark a page protection value as "uncacheable". + */ +#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) + +static inline int pmd_large(pmd_t pte) +{ + return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE; +} + /* Change flags of a PTE */ -static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) +static inline pte_t pte_modify(pte_t pte_old, pgprot_t newprot) { - pte_val(pte) &= _PAGE_CHG_MASK; - pte_val(pte) |= pgprot_val(newprot); - pte_val(pte) &= __supported_pte_mask; - return pte; + unsigned long pte = pte_val(pte_old); + pte &= _PAGE_CHG_MASK; + pte |= pgprot_val(newprot); + pte &= __supported_pte_mask; + return __pte(pte); } #define pte_index(address) \ @@ -387,6 +450,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) int __changed = !pte_same(*(__ptep), __entry); \ if (__changed && __dirty) { \ set_pte(__ptep, __entry); \ + pte_update_defer((__vma)->vm_mm, (__address), (__ptep)); \ flush_tlb_page(__vma, __address); \ } \ __changed; \ -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:29:17
|
This patch turns the basic descriptor handling into native_ functions. It is basically write_idt, load_idt, write_gdt, load_gdt, set_ldt, store_tr, load_tls, and the ones for updating a single entry. In the process of doing that, we change the definition of load_LDT_nolock, and caller sites have to be patched. We also patch call sites that now needs a typecast. Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- include/asm-x86/desc.h | 59 ++++++++++++ include/asm-x86/desc_32.h | 45 --------- include/asm-x86/desc_64.h | 191 ++++++++++++++++++------------------- include/asm-x86/mmu_context_64.h | 23 ++++- 4 files changed, 169 insertions(+), 149 deletions(-) diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index 6065c50..276dc6e 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -1,5 +1,64 @@ +#ifndef _ASM_DESC_H_ +#define _ASM_DESC_H_ + #ifdef CONFIG_X86_32 # include "desc_32.h" #else # include "desc_64.h" #endif + +#ifndef __ASSEMBLY__ +#define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +#define LDT_entry_b(info) \ + (((info)->base_addr & 0xff000000) | \ + (((info)->base_addr & 0x00ff0000) >> 16) | \ + ((info)->limit & 0xf0000) | \ + (((info)->read_exec_only ^ 1) << 9) | \ + ((info)->contents << 10) | \ + (((info)->seg_not_present ^ 1) << 15) | \ + ((info)->seg_32bit << 22) | \ + ((info)->limit_in_pages << 23) | \ + ((info)->useable << 20) | \ + 0x7000) + +#define _LDT_empty(info) (\ + (info)->base_addr == 0 && \ + (info)->limit == 0 && \ + (info)->contents == 0 && \ + (info)->read_exec_only == 1 && \ + (info)->seg_32bit == 0 && \ + (info)->limit_in_pages == 0 && \ + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +#ifdef CONFIG_X86_64 +#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) +#else +#define LDT_empty(info) _LDT_empty(info) +#endif + +static inline void clear_LDT(void) +{ + set_ldt(NULL, 0); +} + +/* + * load one particular LDT into the current CPU + */ +static inline void load_LDT_nolock(mm_context_t *pc) +{ + set_ldt(pc->ldt, pc->size); +} + +static inline void load_LDT(mm_context_t *pc) +{ + preempt_disable(); + load_LDT_nolock(pc); + preempt_enable(); +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/include/asm-x86/desc_32.h b/include/asm-x86/desc_32.h index c547403..84bb843 100644 --- a/include/asm-x86/desc_32.h +++ b/include/asm-x86/desc_32.h @@ -162,51 +162,6 @@ static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const vo #define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) -#define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) - -#define LDT_entry_b(info) \ - (((info)->base_addr & 0xff000000) | \ - (((info)->base_addr & 0x00ff0000) >> 16) | \ - ((info)->limit & 0xf0000) | \ - (((info)->read_exec_only ^ 1) << 9) | \ - ((info)->contents << 10) | \ - (((info)->seg_not_present ^ 1) << 15) | \ - ((info)->seg_32bit << 22) | \ - ((info)->limit_in_pages << 23) | \ - ((info)->useable << 20) | \ - 0x7000) - -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 ) - -static inline void clear_LDT(void) -{ - set_ldt(NULL, 0); -} - -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(unsigned long *desc) { unsigned long base; diff --git a/include/asm-x86/desc_64.h b/include/asm-x86/desc_64.h index 7d48df7..d12cd07 100644 --- a/include/asm-x86/desc_64.h +++ b/include/asm-x86/desc_64.h @@ -16,11 +16,12 @@ extern struct desc_struct cpu_gdt_table[GDT_ENTRIES]; -#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)) -#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8)) -#define clear_LDT() asm volatile("lldt %w0"::"r" (0)) +static inline void native_load_tr_desc(void) +{ + asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8)); +} -static inline unsigned long __store_tr(void) +static inline unsigned long native_store_tr(void) { unsigned long tr; @@ -28,8 +29,6 @@ static inline unsigned long __store_tr(void) return tr; } -#define store_tr(tr) (tr) = __store_tr() - /* * This is the ldt that every process will get unless we need * something other than this. @@ -38,79 +37,54 @@ extern struct desc_struct default_ldt[]; extern struct gate_struct idt_table[]; extern struct desc_ptr cpu_gdt_descr[]; -static inline void write_ldt_entry(struct desc_struct *ldt, - int entry, u32 entry_low, u32 entry_high) +static inline void write_dt_entry(struct desc_struct *dt, int entry, + u32 entry_low, u32 entry_high) { - __u32 *lp = (__u32 *)((entry << 3) + (char *)ldt); - - lp[0] = entry_low; - lp[1] = entry_high; + ((struct n_desc_struct *)dt)[entry].a = entry_low; + ((struct n_desc_struct *)dt)[entry].b = entry_high; } /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) -static inline void load_gdt(const struct desc_ptr *ptr) +static inline void native_load_gdt(const struct desc_ptr *ptr) { asm volatile("lgdt %w0"::"m" (*ptr)); } -static inline void store_gdt(struct desc_ptr *ptr) +static inline void native_store_gdt(struct desc_ptr *ptr) { asm("sgdt %w0":"=m" (*ptr)); } -static inline void _set_gate(void *adr, unsigned type, unsigned long func, - unsigned dpl, unsigned ist) +static inline void native_write_idt_entry(void *adr, struct gate_struct *s) { - struct gate_struct s; - - s.offset_low = PTR_LOW(func); - s.segment = __KERNEL_CS; - s.ist = ist; - s.p = 1; - s.dpl = dpl; - s.zero0 = 0; - s.zero1 = 0; - s.type = type; - s.offset_middle = PTR_MIDDLE(func); - s.offset_high = PTR_HIGH(func); - /* - * does not need to be atomic because it is only done once at - * setup time - */ - memcpy(adr, &s, 16); + /* does not need to be atomic because + * it is only done once at setup time */ + memcpy(adr, s, 16); } -static inline void set_intr_gate(int nr, void *func) +static inline void native_write_gdt_entry(void *ptr, void *entry, + unsigned type, unsigned size) { - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); -} - -static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) -{ - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); -} - -static inline void set_system_gate(int nr, void *func) -{ - BUG_ON((unsigned)nr > 0xFF); - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); + memcpy(ptr, entry, size); } -static inline void set_system_gate_ist(int nr, void *func, unsigned ist) -{ - _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); -} +/* + * This one unfortunately can't go with the others, in below, because it has + * an user anxious for its definition: set_tssldt_descriptor + */ +#ifndef CONFIG_PARAVIRT +#define write_gdt_entry(_ptr, _e, _type, _size) \ + native_write_gdt_entry((_ptr), (_e), (_type), (_size)) +#endif -static inline void load_idt(const struct desc_ptr *ptr) +static inline void native_load_idt(const struct desc_ptr *ptr) { asm volatile("lidt %w0"::"m" (*ptr)); } -static inline void store_idt(struct desc_ptr *dtr) +static inline void native_store_idt(struct desc_ptr *dtr) { asm("sidt %w0":"=m" (*dtr)); } @@ -129,7 +103,7 @@ static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, d.limit1 = (size >> 16) & 0xF; d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; d.base3 = PTR_HIGH(tss); - memcpy(ptr, &d, 16); + write_gdt_entry(ptr, &d, type, 16); } static inline void set_tss_desc(unsigned cpu, void *addr) @@ -152,35 +126,7 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size) DESC_LDT, size * 8 - 1); } -#define LDT_entry_a(info) \ - ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) -/* Don't allow setting of the lm bit. It is useless anyways because - 64bit system calls require __USER_CS. */ -#define LDT_entry_b(info) \ - (((info)->base_addr & 0xff000000) | \ - (((info)->base_addr & 0x00ff0000) >> 16) | \ - ((info)->limit & 0xf0000) | \ - (((info)->read_exec_only ^ 1) << 9) | \ - ((info)->contents << 10) | \ - (((info)->seg_not_present ^ 1) << 15) | \ - ((info)->seg_32bit << 22) | \ - ((info)->limit_in_pages << 23) | \ - ((info)->useable << 20) | \ - /* ((info)->lm << 21) | */ \ - 0x7000) - -#define LDT_empty(info) (\ - (info)->base_addr == 0 && \ - (info)->limit == 0 && \ - (info)->contents == 0 && \ - (info)->read_exec_only == 1 && \ - (info)->seg_32bit == 0 && \ - (info)->limit_in_pages == 0 && \ - (info)->seg_not_present == 1 && \ - (info)->useable == 0 && \ - (info)->lm == 0) - -static inline void load_TLS(struct thread_struct *t, unsigned int cpu) +static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) { unsigned int i; u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN); @@ -189,28 +135,77 @@ static inline void load_TLS(struct thread_struct *t, unsigned int cpu) gdt[i] = t->tls_array[i]; } -/* - * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc, int cpu) +static inline void native_set_ldt(const void *addr, + unsigned int entries) { - int count = pc->size; + if (likely(entries == 0)) + __asm__ __volatile__ ("lldt %w0" :: "r" (0)); + else { + unsigned cpu = smp_processor_id(); - if (likely(!count)) { - clear_LDT(); - return; + set_tssldt_descriptor(&cpu_gdt(cpu)[GDT_ENTRY_LDT], + (unsigned long)addr, DESC_LDT, + entries * 8 - 1); + __asm__ __volatile__ ("lldt %w0"::"r" (GDT_ENTRY_LDT*8)); } +} + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(ptr) native_load_gdt(ptr) +#define load_idt(ptr) native_load_idt(ptr) +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt(addr, entries) native_set_ldt(addr, entries) +#define store_tr(tr) (tr) = native_store_tr() +#define store_gdt(ptr) native_store_gdt(ptr) +#define store_idt(ptr) native_store_idt(ptr) + +#define write_idt_entry(_adr, _s) native_write_idt_entry((_adr), (_s)) +#define write_ldt_entry(_ldt, _number, _entry1, _entry2) \ + write_dt_entry((_ldt), (_number), (_entry1), (_entry2)) +#endif + +static inline void _set_gate(void *adr, unsigned type, unsigned long func, + unsigned dpl, unsigned ist) +{ + struct gate_struct s; + + s.offset_low = PTR_LOW(func); + s.segment = __KERNEL_CS; + s.ist = ist; + s.p = 1; + s.dpl = dpl; + s.zero0 = 0; + s.zero1 = 0; + s.type = type; + s.offset_middle = PTR_MIDDLE(func); + s.offset_high = PTR_HIGH(func); + write_idt_entry(adr, &s); +} + +static inline void set_intr_gate(int nr, void *func) +{ + BUG_ON((unsigned)nr > 0xFF); + _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); +} - set_ldt_desc(cpu, pc->ldt, count); - load_LDT_desc(); +static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) +{ + BUG_ON((unsigned)nr > 0xFF); + _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); } -static inline void load_LDT(mm_context_t *pc) +static inline void set_system_gate(int nr, void *func) { - int cpu = get_cpu(); + BUG_ON((unsigned)nr > 0xFF); + _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); +} - load_LDT_nolock(pc, cpu); - put_cpu(); +static inline void set_system_gate_ist(int nr, void *func, unsigned ist) +{ + _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } extern struct desc_ptr idt_descr; diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h index 29f95c3..85b7fb8 100644 --- a/include/asm-x86/mmu_context_64.h +++ b/include/asm-x86/mmu_context_64.h @@ -7,7 +7,16 @@ #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> + +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#else #include <asm-generic/mm_hooks.h> +static inline void paravirt_activate_mm(struct mm_struct *prev, + struct mm_struct *next) +{ +} +#endif /* CONFIG_PARAVIRT */ /* * possibly do the LDT unload here? @@ -25,7 +34,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void load_cr3(pgd_t *pgd) { - asm volatile("movq %0,%%cr3" :: "r" (__pa(pgd)) : "memory"); + write_cr3(__pa(pgd)); } static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, @@ -43,7 +52,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); if (unlikely(next->context.ldt != prev->context.ldt)) - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } #ifdef CONFIG_SMP else { @@ -56,7 +65,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * to make sure to use no freed page tables. */ load_cr3(next->pgd); - load_LDT_nolock(&next->context, cpu); + load_LDT_nolock(&next->context); } } #endif @@ -67,8 +76,10 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, asm volatile("movl %0,%%fs"::"r"(0)); \ } while(0) -#define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) - +#define activate_mm(prev, next) \ +do { \ + paravirt_activate_mm(prev, next); \ + switch_mm((prev), (next), NULL); \ +} while (0) #endif -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:29:38
|
This patch makes vsmp a paravirt client. It now uses the whole infrastructure provided by pvops. When we detect we're running a vsmp box, we change the irq-related paravirt operations (and so, it have to happen quite early), and the patching function Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/Kconfig.x86_64 | 3 +- arch/x86/kernel/Makefile_64 | 3 +- arch/x86/kernel/setup_64.c | 2 + arch/x86/kernel/vsmp_64.c | 82 +++++++++++++++++++++++++++++++++++++----- include/asm-x86/setup.h | 6 ++- 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/arch/x86/Kconfig.x86_64 b/arch/x86/Kconfig.x86_64 index 568ba7a..fe4ef61 100644 --- a/arch/x86/Kconfig.x86_64 +++ b/arch/x86/Kconfig.x86_64 @@ -148,15 +148,14 @@ config X86_PC bool "PC-compatible" help Choose this option if your computer is a standard PC or compatible. - config X86_VSMP bool "Support for ScaleMP vSMP" depends on PCI + select PARAVIRT help Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is supposed to run on these EM64T-based machines. Only choose this option if you have one of these machines. - endchoice choice diff --git a/arch/x86/kernel/Makefile_64 b/arch/x86/kernel/Makefile_64 index 0714528..34e5c7d 100644 --- a/arch/x86/kernel/Makefile_64 +++ b/arch/x86/kernel/Makefile_64 @@ -8,7 +8,7 @@ obj-y := process_64.o signal_64.o entry_64.o traps_64.o irq_64.o \ x8664_ksyms_64.o i387_64.o syscall_64.o vsyscall_64.o \ setup64.o bootflag.o e820_64.o reboot_64.o quirks.o i8237.o \ pci-dma_64.o pci-nommu_64.o alternative.o hpet.o tsc_64.o bugs_64.o \ - i8253.o rtc.o + i8253.o rtc.o vsmp_64.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ @@ -29,7 +29,6 @@ obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o obj-$(CONFIG_KPROBES) += kprobes_64.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o -obj-$(CONFIG_X86_VSMP) += vsmp_64.o obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_AUDIT) += audit_64.o diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 1c9f237..8cc5915 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -338,6 +338,8 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + vsmp_init(); + dmi_scan_machine(); #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index d971210..24b0541 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -8,31 +8,95 @@ * * Ravikiran Thirumalai <ki...@sc...>, * Shai Fultheim <sh...@sc...> + * Paravirt ops integration: Glauber de Oliveira Costa <gc...@re...> */ - #include <linux/init.h> #include <linux/pci_ids.h> #include <linux/pci_regs.h> #include <asm/pci-direct.h> #include <asm/io.h> +#include <asm/paravirt.h> + +/* + * Interrupt control for the VSMP architecture: + */ + +static inline unsigned long vsmp_save_fl(void) +{ + unsigned long flags = native_save_fl(); + + if (!(flags & X86_EFLAGS_AC)) + return X86_EFLAGS_IF; + return 0; +} + +static inline void vsmp_restore_fl(unsigned long flags) +{ + if (flags & X86_EFLAGS_IF) + flags &= ~X86_EFLAGS_AC; + if (!(flags & X86_EFLAGS_IF)) + flags &= X86_EFLAGS_AC; + native_restore_fl(flags); +} + +static inline void vsmp_irq_disable(void) +{ + unsigned long flags = native_save_fl(); -static int __init vsmp_init(void) + vsmp_restore_fl(flags & ~X86_EFLAGS_IF); +} + +static inline void vsmp_irq_enable(void) +{ + unsigned long flags = native_save_fl(); + + vsmp_restore_fl(flags | X86_EFLAGS_IF); +} + +#ifdef CONFIG_PARAVIRT +static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len) +{ + switch (type) { + case PARAVIRT_PATCH(pv_irq_ops.irq_enable): + case PARAVIRT_PATCH(pv_irq_ops.irq_disable): + case PARAVIRT_PATCH(pv_irq_ops.save_fl): + case PARAVIRT_PATCH(pv_irq_ops.restore_fl): + return paravirt_patch_default(type, clobbers, ibuf, addr, len); + default: + return native_patch(type, clobbers, ibuf, addr, len); + } + +} +#endif + +void __init vsmp_init(void) { void *address; - unsigned int cap, ctl; + unsigned int cap, ctl, cfg; if (!early_pci_allowed()) - return 0; + return; /* Check if we are running on a ScaleMP vSMP box */ if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) - return 0; + return; + +#ifdef CONFIG_PARAVIRT + /* If we are, use the distinguished irq functions */ + pv_irq_ops.irq_disable = vsmp_irq_disable; + pv_irq_ops.irq_enable = vsmp_irq_enable; + pv_irq_ops.save_fl = vsmp_save_fl; + pv_irq_ops.restore_fl = vsmp_restore_fl; + pv_init_ops.patch = vsmp_patch; +#endif /* set vSMP magic bits to indicate vSMP capable kernel */ - address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); + cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0); + address = early_ioremap(cfg, 8); cap = readl(address); ctl = readl(address + 4); printk(KERN_INFO "vSMP CTL: capabilities:0x%08x control:0x%08x\n", @@ -45,8 +109,6 @@ static int __init vsmp_init(void) printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl); } - iounmap(address); - return 0; + early_iounmap(address, 8); + return; } - -core_initcall(vsmp_init); diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index 071e054..8f532fa 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -4,6 +4,10 @@ #define COMMAND_LINE_SIZE 2048 #ifndef __ASSEMBLY__ + +/* For EM64T-based VSMP machines */ +void vsmp_init(void); + char *machine_specific_memory_setup(void); #ifndef CONFIG_PARAVIRT #define paravirt_post_allocator_init() do {} while (0) @@ -58,8 +62,6 @@ void __init add_memory_region(unsigned long long start, extern unsigned long init_pg_tables_end; - - #endif /* __i386__ */ #endif /* _SETUP */ #endif /* __ASSEMBLY__ */ -- 1.4.4.2 |
From: Andi K. <ak...@su...> - 2007-11-11 18:07:13
|
On Fri, Nov 09, 2007 at 04:43:05PM -0200, Glauber de Oliveira Costa wrote: > This patch makes vsmp a paravirt client. It now uses the whole > infrastructure provided by pvops. When we detect we're running > a vsmp box, we change the irq-related paravirt operations (and so, > it have to happen quite early), and the patching function The PARAVIRT ifdefs look wrong. Surely you don't need them at all because it cannot work at all without paravirt. Also you got some white space damage. And the "EM64T based comment" is wrong because there are AMD based vSMPs too. -Andi |
From: Andi K. <ak...@su...> - 2007-11-13 12:33:57
|
> The vsmp_64.c file is now compiled unconditionally, according to which > me and kiran agreed to. The detection code is always run, but will only > trigger when a suitable box is found. Accordingly, the paravirt structs > are only touched when PARAVIRT is on. Otherwise, we don't even have the > symbols. That seems dumb. What good is it if it doesn't patch the interrupt code? -Andi |
From: Andi K. <ak...@su...> - 2007-11-13 13:09:46
|
> If vsmp is selected, PARAVIRT will be too, and the interrupt code will > be patched. > the vsmp option triggers a select statement. > > the ifdef only exists because, as I said, the code itself will be always > compiled in, to avoid an ifdef in setup_64.c. So it's just a taking it > from here, putting it there issue. Peeking around needlessly in pci config space at each boot just to avoid an ifdef is not an good idea. -Andi |
From: Ravikiran T. <ki...@sc...> - 2007-11-13 18:50:03
|
On Tue, Nov 13, 2007 at 02:09:41PM +0100, Andi Kleen wrote: > >> If vsmp is selected, PARAVIRT will be too, and the interrupt code will >> be patched. >> the vsmp option triggers a select statement. >> >> the ifdef only exists because, as I said, the code itself will be always >> compiled in, to avoid an ifdef in setup_64.c. So it's just a taking it >> from here, putting it there issue. > >Peeking around needlessly in pci config space at each boot just to avoid an >ifdef is not an good idea. Agreed. vsmp_init does not make sense unless the irq ops are patched. We can call vsmp_init under #ifdef CONFIG_PARAVIRT. Thanks, Kiran |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:29:08
|
under paravirt, read cr2 cannot be issued directly anymore. So wrap it in a macro, defined to the operation itself in case paravirt is off, but to something else if we have paravirt in the game Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/kernel/head_64.S | 9 ++++++++- 1 files changed, 8 insertions(+), 1 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b6167fe..c31b1c9 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -19,6 +19,13 @@ #include <asm/msr.h> #include <asm/cache.h> +#ifdef CONFIG_PARAVIRT +#include <asm/asm-offsets.h> +#include <asm/paravirt.h> +#else +#define GET_CR2_INTO_RCX movq %cr2, %rcx +#endif + /* we are not able to switch in one step to the final KERNEL ADRESS SPACE * because we need identity-mapped pages. * @@ -267,7 +274,7 @@ ENTRY(early_idt_handler) xorl %eax,%eax movq 8(%rsp),%rsi # get rip movq (%rsp),%rdx - movq %cr2,%rcx + GET_CR2_INTO_RCX leaq early_idt_msg(%rip),%rdi call early_printk cmpl $2,early_recursion_flag(%rip) -- 1.4.4.2 |
From: Glauber de O. C. <gc...@re...> - 2007-11-09 21:29:23
|
We need something here because we can't call in and out instructions directly. However, we have to be careful, because no indirections are allowed in misc_64.c , and paravirt_ops is a kind of one. So just call it directly there Signed-off-by: Glauber de Oliveira Costa <gc...@re...> Signed-off-by: Steven Rostedt <ro...@go...> Acked-by: Jeremy Fitzhardinge <je...@xe...> --- arch/x86/boot/compressed/misc_64.c | 6 +++++ include/asm-x86/io_64.h | 37 +++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/misc_64.c b/arch/x86/boot/compressed/misc_64.c index 6ea015a..6640a17 100644 --- a/arch/x86/boot/compressed/misc_64.c +++ b/arch/x86/boot/compressed/misc_64.c @@ -9,6 +9,12 @@ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 */ +/* + * we have to be careful, because no indirections are allowed here, and + * paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening + */ +#undef CONFIG_PARAVIRT #define _LINUX_STRING_H_ 1 #define __LINUX_BITMAP_H 1 diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index a037b07..57fcdd9 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -35,12 +35,24 @@ * - Arnaldo Carvalho de Melo <ac...@co...> */ -#define __SLOW_DOWN_IO "\noutb %%al,$0x80" +static inline void native_io_delay(void) +{ + asm volatile("outb %%al,$0x80" : : : "memory"); +} -#ifdef REALLY_SLOW_IO -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO +#if defined(CONFIG_PARAVIRT) +#include <asm/paravirt.h> #else -#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); +#endif +} #endif /* @@ -52,9 +64,15 @@ static inline void out##s(unsigned x value, unsigned short port) { #define __OUT2(s,s1,s2) \ __asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1" +#ifndef REALLY_SLOW_IO +#define REALLY_SLOW_IO +#define UNSET_REALLY_SLOW_IO +#endif + #define __OUT(s,s1,x) \ __OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \ -__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \ +__OUT1(s##_p, x) __OUT2(s, s1, "w") : : "a" (value), "Nd" (port)); \ + slow_down_io(); } #define __IN1(s) \ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; @@ -63,8 +81,13 @@ static inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v; __asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0" #define __IN(s,s1,i...) \ -__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ -__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \ +__IN1(s) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); return _v; } \ +__IN1(s##_p) __IN2(s, s1, "w") : "=a" (_v) : "Nd" (port), ##i); \ + slow_down_io(); return _v; } + +#ifdef UNSET_REALLY_SLOW_IO +#undef REALLY_SLOW_IO +#endif #define __INS(s) \ static inline void ins##s(unsigned short port, void * addr, unsigned long count) \ -- 1.4.4.2 |