From: Andy P. <at...@us...> - 2002-04-10 18:37:40
|
Update of /cvsroot/linux-vax/kernel-2.4/arch/i386/mm In directory usw-pr-cvs1:/tmp/cvs-serv10929/i386/mm Modified Files: extable.c fault.c init.c ioremap.c Log Message: synch 2.4.15 commit 35 Index: extable.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/mm/extable.c,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- extable.c 25 Feb 2001 23:15:16 -0000 1.1.1.2 +++ extable.c 10 Apr 2002 14:23:22 -0000 1.2 @@ -37,19 +37,19 @@ search_exception_table(unsigned long addr) { unsigned long ret = 0; - unsigned long flags; #ifndef CONFIG_MODULES /* There is only the kernel to search. */ ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr); return ret; #else + unsigned long flags; /* The kernel is the last "module" -- no need to treat it special. */ struct module *mp; spin_lock_irqsave(&modlist_lock, flags); for (mp = module_list; mp != NULL; mp = mp->next) { - if (mp->ex_table_start == NULL) + if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING))) continue; ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, addr); Index: fault.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/mm/fault.c,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- fault.c 25 Feb 2001 23:15:16 -0000 1.1.1.2 +++ fault.c 10 Apr 2002 14:23:22 -0000 1.2 @@ -17,6 +17,8 @@ #include <linux/smp_lock.h> #include <linux/interrupt.h> #include <linux/init.h> +#include <linux/tty.h> +#include <linux/vt_kern.h> /* For unblank_screen() */ #include <asm/system.h> #include <asm/uaccess.h> @@ -25,6 +27,8 @@ extern void die(const char *,struct pt_regs *,long); +extern int console_loglevel; + /* * Ugly, ugly, but the goto's result in better assembly.. */ @@ -51,8 +55,14 @@ start &= PAGE_MASK; for (;;) { - if (handle_mm_fault(current->mm, vma, start, 1) <= 0) - goto bad_area; + survive: + { + int fault = handle_mm_fault(current->mm, vma, start, 1); + if (!fault) + goto bad_area; + if (fault < 0) + goto out_of_memory; + } if (!size) break; size--; @@ -75,19 +85,52 @@ bad_area: return 0; + +out_of_memory: + if (current->pid == 1) { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + goto bad_area; } -extern spinlock_t console_lock, timerlist_lock; +extern spinlock_t timerlist_lock; /* * Unlock any spinlocks which will prevent us from getting the * message out (timerlist_lock is acquired through the * console unblank code) */ -void bust_spinlocks(void) +void bust_spinlocks(int yes) { - spin_lock_init(&console_lock); spin_lock_init(&timerlist_lock); + if (yes) { + oops_in_progress = 1; +#ifdef CONFIG_SMP + global_irq_lock = 0; /* Many serial drivers do __global_cli() */ +#endif + } else { + int loglevel_save = console_loglevel; +#ifdef CONFIG_VT + unblank_screen(); +#endif + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; /* NMI oopser may have shut the console up */ + printk(" "); + console_loglevel = loglevel_save; + } +} + +void do_BUG(const char *file, int line) +{ + bust_spinlocks(1); + printk("kernel BUG at %s:%d!\n", file, line); } asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); @@ -117,6 +160,10 @@ /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); + /* It's safe to allow irq's after cr2 has been saved */ + if (regs->eflags & X86_EFLAGS_IF) + local_irq_enable(); + tsk = current; /* @@ -127,8 +174,12 @@ * be in an interrupt or a critical region, and should * only copy the information from the master page table, * nothing more. + * + * This verifies that the fault happens in kernel space + * (error_code & 4) == 0, and that the fault was not a + * protection error (error_code & 1) == 0. */ - if (address >= TASK_SIZE) + if (address >= TASK_SIZE && !(error_code & 5)) goto vmalloc_fault; mm = tsk->mm; @@ -141,7 +192,7 @@ if (in_interrupt() || !mm) goto no_context; - down(&mm->mmap_sem); + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if (!vma) @@ -188,6 +239,7 @@ goto bad_area; } + survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -214,7 +266,7 @@ if (bit < 32) tsk->thread.screen_bitmap |= 1 << bit; } - up(&mm->mmap_sem); + up_read(&mm->mmap_sem); return; /* @@ -222,9 +274,8 @@ * Fix it, but check if it's kernel or user first.. */ bad_area: - up(&mm->mmap_sem); + up_read(&mm->mmap_sem); -bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { tsk->thread.cr2 = address; @@ -264,7 +315,7 @@ * terminate things with extreme prejudice. */ - bust_spinlocks(); + bust_spinlocks(1); if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); @@ -283,6 +334,7 @@ printk(KERN_ALERT "*pte = %08lx\n", page); } die("Oops", regs, error_code); + bust_spinlocks(0); do_exit(SIGKILL); /* @@ -290,14 +342,20 @@ * us unable to handle the page fault gracefully. */ out_of_memory: - up(&mm->mmap_sem); + up_read(&mm->mmap_sem); + if (tsk->pid == 1) { + tsk->policy |= SCHED_YIELD; + schedule(); + down_read(&mm->mmap_sem); + goto survive; + } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) do_exit(SIGKILL); goto no_context; do_sigbus: - up(&mm->mmap_sem); + up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel @@ -306,7 +364,7 @@ tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; - info.si_code = SIGBUS; + info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; info.si_addr = (void *)address; @@ -322,27 +380,32 @@ /* * Synchronize this task's top level page-table * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. */ int offset = __pgd_offset(address); pgd_t *pgd, *pgd_k; pmd_t *pmd, *pmd_k; + pte_t *pte_k; - pgd = tsk->active_mm->pgd + offset; + asm("movl %%cr3,%0":"=r" (pgd)); + pgd = offset + (pgd_t *)__va(pgd); pgd_k = init_mm.pgd + offset; - if (!pgd_present(*pgd)) { - if (!pgd_present(*pgd_k)) - goto bad_area_nosemaphore; - set_pgd(pgd, *pgd_k); - return; - } - + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + pmd = pmd_offset(pgd, address); pmd_k = pmd_offset(pgd_k, address); - - if (pmd_present(*pmd) || !pmd_present(*pmd_k)) - goto bad_area_nosemaphore; + if (!pmd_present(*pmd_k)) + goto no_context; set_pmd(pmd, *pmd_k); + + pte_k = pte_offset(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; return; } } Index: init.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/mm/init.c,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- init.c 25 Feb 2001 23:15:16 -0000 1.1.1.2 +++ init.c 10 Apr 2002 14:23:22 -0000 1.2 @@ -35,137 +35,30 @@ #include <asm/fixmap.h> #include <asm/e820.h> #include <asm/apic.h> +#include <asm/tlb.h> +mmu_gather_t mmu_gathers[NR_CPUS]; unsigned long highstart_pfn, highend_pfn; static unsigned long totalram_pages; static unsigned long totalhigh_pages; -/* - * BAD_PAGE is the page that is used for page faults when linux - * is out-of-memory. Older versions of linux just did a - * do_exit(), but using this instead means there is less risk - * for a process dying in kernel mode, possibly leaving an inode - * unused etc.. - * - * BAD_PAGETABLE is the accompanying page-table: it is initialized - * to point to BAD_PAGE entries. - * - * ZERO_PAGE is a special page that is used for zero-initialized - * data and COW. - */ - -/* - * These are allocated in head.S so that we get proper page alignment. - * If you change the size of these then change head.S as well. - */ -extern char empty_bad_page[PAGE_SIZE]; -#if CONFIG_X86_PAE -extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD]; -#endif -extern pte_t empty_bad_pte_table[PTRS_PER_PTE]; - -/* - * We init them before every return and make them writable-shared. - * This guarantees we get out of the kernel in some more or less sane - * way. - */ -#if CONFIG_X86_PAE -static pmd_t * get_bad_pmd_table(void) -{ - pmd_t v; - int i; - - set_pmd(&v, __pmd(_PAGE_TABLE + __pa(empty_bad_pte_table))); - - for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++) - empty_bad_pmd_table[i] = v; - - return empty_bad_pmd_table; -} -#endif - -static pte_t * get_bad_pte_table(void) -{ - pte_t v; - int i; - - v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED)); - - for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++) - empty_bad_pte_table[i] = v; - - return empty_bad_pte_table; -} - - - -void __handle_bad_pmd(pmd_t *pmd) -{ - pmd_ERROR(*pmd); - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table()))); -} - -void __handle_bad_pmd_kernel(pmd_t *pmd) -{ - pmd_ERROR(*pmd); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table()))); -} - -pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_page(GFP_KERNEL); - if (pmd_none(*pmd)) { - if (pte) { - clear_page(pte); - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte))); - return pte + offset; - } - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(get_bad_pte_table()))); - return NULL; - } - free_page((unsigned long)pte); - if (pmd_bad(*pmd)) { - __handle_bad_pmd_kernel(pmd); - return NULL; - } - return (pte_t *) pmd_page(*pmd) + offset; -} - -pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset) -{ - unsigned long pte; - - pte = (unsigned long) __get_free_page(GFP_KERNEL); - if (pmd_none(*pmd)) { - if (pte) { - clear_page((void *)pte); - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); - return (pte_t *)pte + offset; - } - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(get_bad_pte_table()))); - return NULL; - } - free_page(pte); - if (pmd_bad(*pmd)) { - __handle_bad_pmd(pmd); - return NULL; - } - return (pte_t *) pmd_page(*pmd) + offset; -} - int do_check_pgt_cache(int low, int high) { int freed = 0; if(pgtable_cache_size > high) { do { - if(pgd_quicklist) - free_pgd_slow(get_pgd_fast()), freed++; - if(pmd_quicklist) - free_pmd_slow(get_pmd_fast()), freed++; - if(pte_quicklist) - free_pte_slow(get_pte_fast()), freed++; + if (pgd_quicklist) { + free_pgd_slow(get_pgd_fast()); + freed++; + } + if (pmd_quicklist) { + pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); + freed++; + } + if (pte_quicklist) { + pte_free_slow(pte_alloc_one_fast(NULL, 0)); + freed++; + } } while(pgtable_cache_size > low); } return freed; @@ -327,10 +220,8 @@ pgd_base = swapper_pg_dir; #if CONFIG_X86_PAE - for (i = 0; i < PTRS_PER_PGD; i++) { - pgd = pgd_base + i; - __pgd_clear(pgd); - } + for (i = 0; i < PTRS_PER_PGD; i++) + set_pgd(pgd_base + i, __pgd(1 + __pa(empty_zero_page))); #endif i = __pgd_offset(PAGE_OFFSET); pgd = pgd_base + i; @@ -420,14 +311,11 @@ * Zap initial low-memory mappings. * * Note that "pgd_clear()" doesn't do it for - * us in this case, because pgd_clear() is a - * no-op in the 2-level case (pmd_clear() is - * the thing that clears the page-tables in - * that case). + * us, because pgd_clear() is a no-op on i386. */ for (i = 0; i < USER_PTRS_PER_PGD; i++) #if CONFIG_X86_PAE - pgd_clear(swapper_pg_dir+i); + set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page))); #else set_pgd(swapper_pg_dir+i, __pgd(0)); #endif @@ -551,13 +439,24 @@ return 0; } +static inline int page_kills_ppro(unsigned long pagenr) +{ + if(pagenr >= 0x70000 && pagenr <= 0x7003F) + return 1; + return 0; +} + void __init mem_init(void) { + extern int ppro_with_ram_bug(void); int codesize, reservedpages, datasize, initsize; int tmp; + int bad_ppro; if (!mem_map) BUG(); + + bad_ppro = ppro_with_ram_bug(); #ifdef CONFIG_HIGHMEM highmem_start_page = mem_map + highstart_pfn; @@ -585,6 +484,11 @@ struct page *page = mem_map + tmp; if (!page_is_ram(tmp)) { + SetPageReserved(page); + continue; + } + if (bad_ppro && page_kills_ppro(tmp)) + { SetPageReserved(page); continue; } Index: ioremap.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/mm/ioremap.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- ioremap.c 14 Jan 2001 19:20:04 -0000 1.1.1.1 +++ ioremap.c 10 Apr 2002 14:23:22 -0000 1.2 @@ -49,7 +49,7 @@ if (address >= end) BUG(); do { - pte_t * pte = pte_alloc_kernel(pmd, address); + pte_t * pte = pte_alloc(&init_mm, pmd, address); if (!pte) return -ENOMEM; remap_area_pte(pte, address, end - address, address + phys_addr, flags); @@ -62,6 +62,7 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, unsigned long size, unsigned long flags) { + int error; pgd_t * dir; unsigned long end = address + size; @@ -70,19 +71,23 @@ flush_cache_all(); if (address >= end) BUG(); + spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc_kernel(dir, address); + pmd = pmd_alloc(&init_mm, dir, address); + error = -ENOMEM; if (!pmd) - return -ENOMEM; + break; if (remap_area_pmd(pmd, address, end - address, phys_addr + address, flags)) - return -ENOMEM; + break; + error = 0; address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); + spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); - return 0; + return error; } /* |