From: James S. <jsi...@us...> - 2002-04-24 18:56:59
|
Update of /cvsroot/linuxconsole/ruby/linux/arch/x86_64/mm In directory usw-pr-cvs1:/tmp/cvs-serv8328/linux/arch/x86_64/mm Modified Files: fault.c Log Message: Synced to 2.5.9. Note some of the fb changes went into 2.5.9. Yeah :-) Index: fault.c =================================================================== RCS file: /cvsroot/linuxconsole/ruby/linux/arch/x86_64/mm/fault.c,v retrieving revision 1.2 retrieving revision 1.3 diff -u -d -r1.2 -r1.3 --- fault.c 18 Mar 2002 19:29:09 -0000 1.2 +++ fault.c 24 Apr 2002 18:56:51 -0000 1.3 @@ -2,8 +2,10 @@ * linux/arch/x86-64/mm/fault.c * * Copyright (C) 1995 Linus Torvalds + * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs. */ +#include <linux/config.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -18,17 +20,17 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/tty.h> +#include <linux/compiler.h> #include <asm/system.h> #include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/hardirq.h> +#include <asm/smp.h> +#include <asm/tlbflush.h> extern void die(const char *,struct pt_regs *,long); -asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); -extern unsigned long idt; - extern spinlock_t console_lock, timerlist_lock; void bust_spinlocks(int yes) @@ -40,7 +42,8 @@ global_irq_lock = 0; /* Many serial drivers do __global_cli() */ #endif } else { - int loglevel_save = console_loglevel; + int loglevel_save = console_loglevel; + oops_in_progress = 0; /* * OK, the message is on the console. Now we call printk() @@ -53,33 +56,34 @@ } } -void do_BUG(const char *file, int line) -{ - bust_spinlocks(1); - printk("kernel BUG at %s:%d!\n", file, line); -} - - void dump_pagetable(unsigned long address) { static char *name[] = { "PML4", "PGD", "PDE", "PTE" }; int i, shift; unsigned long page; - asm("movq %%cr3,%0":"=r" (page)); shift = 9+9+9+12; address &= ~0xFFFF000000000000UL; + asm("movq %%cr3,%0" : "=r" (page)); for (i = 0; i < 4; i++) { - page = ((unsigned long *) __va(page))[(address >> shift) & 0x1FFU]; + unsigned long *padr = (unsigned long *) __va(page); + padr += (address >> shift) & 0x1FFU; + if (__get_user(page, padr)) { + printk("%s: bad %p\n", name[i], padr); + break; + } printk("%s: %016lx ", name[i], page); if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */ break; page &= ~0xFFFUL; - shift -= 9; + shift -= (i == 0) ? 12 : 9; } printk("\n"); } +int page_fault_trace; +int exception_trace = 1; + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -89,6 +93,7 @@ * bit 0 == 0 means no page found, 1 means protection fault * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode + * bit 3 == 1 means fault was an instruction fetch */ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) { @@ -103,15 +108,20 @@ /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); + if (page_fault_trace) + printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", + regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); tsk = current; mm = tsk->mm; info.si_code = SEGV_MAPERR; - if (address >= TASK_SIZE && !(error_code & 5)) + /* 5 => page not present and from supervisor mode */ + if (unlikely(!(error_code & 5) && + ((address >= VMALLOC_START && address <= VMALLOC_END) || + (address >= MODULES_VADDR && address <= MODULES_END)))) goto vmalloc_fault; - /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -119,19 +129,10 @@ if (in_interrupt() || !mm) goto no_context; + again: down_read(&mm->mmap_sem); vma = find_vma(mm, address); - -#if 0 - printk("fault at %lx rip:%lx rsp:%lx err:%lx thr:%x ", address,regs->rip,regs->rsp,error_code,tsk->thread.flags); - if (vma) - printk("vma %lx-%lx prot:%lx flags:%lx",vma->vm_start,vma->vm_end, - vma->vm_page_prot,vma->vm_flags); - printk("\n"); -#endif - - if (!vma) goto bad_area; if (vma->vm_start <= address) @@ -167,7 +168,6 @@ goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -200,10 +200,9 @@ /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { - - printk(KERN_ERR "%.20s[%d] segfaulted rip:%lx rsp:%lx adr:%lx err:%lx\n", - tsk->comm, tsk->pid, - regs->rip, regs->rsp, address, error_code); + printk("%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n", + tsk->comm, tsk->pid, regs->rip, regs->rsp, address, + error_code); tsk->thread.cr2 = address; tsk->thread.error_code = error_code; @@ -221,6 +220,10 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->rip)) != 0) { regs->rip = fixup; + if (exception_trace) + printk(KERN_ERR + "%s: fixed kernel exception at %lx address %lx err:%ld\n", + current->comm, regs->rip, address, error_code); return; } @@ -251,8 +254,7 @@ up_read(&mm->mmap_sem); if (current->pid == 1) { yield(); - down_read(&mm->mmap_sem); - goto survive; + goto again; } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) @@ -278,43 +280,39 @@ /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; + return; vmalloc_fault: { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + /* - * Synchronize the kernel space top level page-table - * with the 'reference' page table. - * Currently it only works for first and last 512 GB of - * kernel memory FIXME - * + * x86-64 has the same kernel 3rd level pages for all CPUs. + * But for vmalloc/modules the TLB synchronization works lazily, + * so it can happen that we get a page fault for something + * that is really already in the page table. Just check if it + * is really there and when yes flush the local TLB. */ - level4_t *l4pd = level4_offset_k(address); - int offset = __pgd_offset(address); - pgd_t *pgd, *pgd_k; - pmd_t *pmd, *pmd_k; - - if (! level4_val(*l4pd)) { - printk(KERN_ERR "fatal - no entry in level4_page for %lx\n", - address); - goto bad_area_nosemaphore; - } - pgd = level3_offset_k(l4pd, address); - pgd_k = init_mm.pgd + offset; - - if (!pgd_present(*pgd)) { - if (!pgd_present(*pgd_k)) + pgd = pgd_offset_k(address); + if (pgd != current_pgd_offset_k(address)) + BUG(); + if (!pgd_present(*pgd)) goto bad_area_nosemaphore; - set_pgd(pgd, *pgd_k); - return; - } - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); - - if (pmd_present(*pmd) || !pmd_present(*pmd_k)) + if (!pmd_present(*pmd)) goto bad_area_nosemaphore; - set_pmd(pmd, *pmd_k); + pte = pte_offset_kernel(pmd, address); + if (!pte_present(*pte)) + goto bad_area_nosemaphore; + + /* Strictly a flush_tlb_all because vmalloc is global, + but this only applies to new global pages so it should + not be needed. vmalloc will likely touch multiple ptes, + so do a full flush instead of a partial one. */ + __flush_tlb(); return; } } |