|
From: Andy P. <at...@us...> - 2002-04-09 17:08:20
|
Update of /cvsroot/linux-vax/kernel-2.4/arch/s390x/mm
In directory usw-pr-cvs1:/tmp/cvs-serv13825/s390x/mm
Modified Files:
extable.c fault.c init.c ioremap.c
Log Message:
synch 2.4.15 commit 29
Index: extable.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/s390x/mm/extable.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- extable.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ extable.c 9 Apr 2002 17:03:18 -0000 1.2
@@ -10,6 +10,7 @@
#include <linux/config.h>
#include <linux/module.h>
+#include <linux/spinlock.h>
#include <asm/uaccess.h>
extern const struct exception_table_entry __start___ex_table[];
@@ -36,26 +37,32 @@
return 0;
}
+extern spinlock_t modlist_lock;
+
unsigned long
search_exception_table(unsigned long addr)
{
- unsigned long ret;
+ unsigned long ret = 0;
+ unsigned long flags;
#ifndef CONFIG_MODULES
/* There is only the kernel to search. */
ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
- if (ret) return FIX_PSW(ret);
+ return ret;
#else
/* The kernel is the last "module" -- no need to treat it special. */
struct module *mp;
+
+ spin_lock_irqsave(&modlist_lock, flags);
for (mp = module_list; mp != NULL; mp = mp->next) {
- if (mp->ex_table_start == NULL)
+ if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
continue;
ret = search_one_table(mp->ex_table_start,
mp->ex_table_end - 1, addr);
- if (ret) return FIX_PSW(ret);
+ if (ret)
+ break;
}
+ spin_unlock_irqrestore(&modlist_lock, flags);
+ return ret;
#endif
-
- return 0;
}
Index: fault.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/s390x/mm/fault.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- fault.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ fault.c 9 Apr 2002 17:03:18 -0000 1.2
@@ -4,6 +4,7 @@
* S390 version
* Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Hartmut Penner (hp...@de...)
+ * Ulrich Weigand (uwe...@de...)
*
* Derived from "arch/i386/mm/fault.c"
* Copyright (C) 1995 Linus Torvalds
@@ -21,6 +22,8 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/console.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -32,6 +35,34 @@
#endif
extern void die(const char *,struct pt_regs *,long);
+static void force_sigsegv(struct task_struct *tsk, int code, void *address);
+
+extern spinlock_t timerlist_lock;
+
+/*
+ * Unlock any spinlocks which will prevent us from getting the
+ * message out (timerlist_lock is acquired through the
+ * console unblank code)
+ */
+void bust_spinlocks(int yes)
+{
+ spin_lock_init(&timerlist_lock);
+ if (yes) {
+ oops_in_progress = 1;
+ } else {
+ int loglevel_save = console_loglevel;
+ oops_in_progress = 0;
+ console_unblank();
+ /*
+ * OK, the message is on the console. Now we call printk()
+ * without oops_in_progress set so that printk will give klogd
+ * a poke. Hold onto your hats...
+ */
+ console_loglevel = 15;
+ printk(" ");
+ console_loglevel = loglevel_save;
+ }
+}
/*
* This routine handles page faults. It determines the address,
@@ -52,18 +83,31 @@
unsigned long address;
unsigned long fixup;
int write;
- unsigned long psw_mask;
- unsigned long psw_addr;
int si_code = SEGV_MAPERR;
int kernel_address = 0;
- /*
- * get psw mask of Program old psw to find out,
- * if user or kernel mode
- */
+ tsk = current;
+ mm = tsk->mm;
+
+ /*
+ * Check for low-address protection. This needs to be treated
+ * as a special case because the translation exception code
+ * field is not guaranteed to contain valid data in this case.
+ */
+ if ((error_code & 0xff) == 4 && !(S390_lowcore.trans_exc_code & 4)) {
- psw_mask = S390_lowcore.program_old_psw.mask;
- psw_addr = S390_lowcore.program_old_psw.addr;
+ /* Low-address protection hit in kernel mode means
+ NULL pointer write access in kernel mode. */
+ if (!(regs->psw.mask & PSW_PROBLEM_STATE)) {
+ address = 0;
+ kernel_address = 1;
+ goto no_context;
+ }
+
+ /* Low-address protection hit in user mode 'cannot happen'. */
+ die ("Low-address protection", regs, error_code);
+ do_exit(SIGKILL);
+ }
/*
* get the failing address
@@ -73,11 +117,6 @@
address = S390_lowcore.trans_exc_code&-4096L;
- tsk = current;
- mm = tsk->mm;
-
- if (in_interrupt() || !mm)
- goto no_context;
/*
* Check which address space the address belongs to
@@ -108,6 +147,7 @@
}
}
die("page fault via unknown access register", regs, error_code);
+ do_exit(SIGKILL);
break;
case 2: /* Secondary Segment Table Descriptor */
@@ -116,19 +156,25 @@
break;
}
+ /*
+ * Check whether we have a user MM in the first place.
+ */
+ if (in_interrupt() || !mm || !(regs->psw.mask & _PSW_IO_MASK_BIT))
+ goto no_context;
/*
* When we get here, the fault happened in the current
- * task's user address space, so we search the VMAs
+ * task's user address space, so we can switch on the
+ * interrupts again and then search the VMAs
*/
- down(&mm->mmap_sem);
+ __sti();
+
+ down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
- if (!vma) {
- printk("no vma for address %lX\n",address);
+ if (!vma)
goto bad_area;
- }
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
@@ -158,6 +204,7 @@
goto bad_area;
}
+ survive:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -176,7 +223,7 @@
goto out_of_memory;
}
- up(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return;
/*
@@ -184,11 +231,10 @@
* Fix it, but check if it's kernel or user first..
*/
bad_area:
- up(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
/* User mode accesses just cause a SIGSEGV */
- if (psw_mask & PSW_PROBLEM_STATE) {
- struct siginfo si;
+ if (regs->psw.mask & PSW_PROBLEM_STATE) {
tsk->thread.prot_addr = address;
tsk->thread.trap_no = error_code;
#ifndef CONFIG_SYSCTL
@@ -205,10 +251,8 @@
show_regs(regs);
}
#endif
- si.si_signo = SIGSEGV;
- si.si_code = si_code;
- si.si_addr = (void*) address;
- force_sig_info(SIGSEGV, &si, tsk);
+
+ force_sigsegv(tsk, si_code, (void *)address);
return;
}
@@ -223,6 +267,7 @@
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
+
if (kernel_address)
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
" at virtual kernel address %016lx\n", address);
@@ -230,10 +275,6 @@
printk(KERN_ALERT "Unable to handle kernel paging request"
" at virtual user address %016lx\n", address);
-/*
- * need to define, which information is useful here
- */
-
die("Oops", regs, error_code);
do_exit(SIGKILL);
@@ -243,14 +284,20 @@
* us unable to handle the page fault gracefully.
*/
out_of_memory:
- up(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ if (tsk->pid == 1) {
+ tsk->policy |= SCHED_YIELD;
+ schedule();
+ down_read(&mm->mmap_sem);
+ goto survive;
+ }
printk("VM: killing process %s\n", tsk->comm);
- if (psw_mask & PSW_PROBLEM_STATE)
+ if (regs->psw.mask & PSW_PROBLEM_STATE)
do_exit(SIGKILL);
goto no_context;
do_sigbus:
- up(&mm->mmap_sem);
+ up_read(&mm->mmap_sem);
/*
* Send a sigbus, regardless of whether we were in kernel
@@ -261,6 +308,158 @@
force_sig(SIGBUS, tsk);
/* Kernel mode? Handle exceptions or die */
- if (!(psw_mask & PSW_PROBLEM_STATE))
+ if (!(regs->psw.mask & PSW_PROBLEM_STATE))
goto no_context;
}
+
+/*
+ * Send SIGSEGV to task. This is an external routine
+ * to keep the stack usage of do_page_fault small.
+ */
+static void force_sigsegv(struct task_struct *tsk, int code, void *address)
+{
+ struct siginfo si;
+ si.si_signo = SIGSEGV;
+ si.si_code = code;
+ si.si_addr = address;
+ force_sig_info(SIGSEGV, &si, tsk);
+}
+
+
+#ifdef CONFIG_PFAULT
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable = 0;
+
+static int __init nopfault(char *str)
+{
+ pfault_disable = 1;
+ return 1;
+}
+
+__setup("nopfault", nopfault);
+
+typedef struct {
+ __u16 refdiagc;
+ __u16 reffcode;
+ __u16 refdwlen;
+ __u16 refversn;
+ __u64 refgaddr;
+ __u64 refselmk;
+ __u64 refcmpmk;
+ __u64 reserved;
+} __attribute__ ((packed)) pfault_refbk_t;
+
+typedef struct _pseudo_wait_t {
+ struct _pseudo_wait_t *next;
+ wait_queue_head_t queue;
+ unsigned long address;
+ int resolved;
+} pseudo_wait_t;
+
+int pfault_init(void)
+{
+ pfault_refbk_t refbk =
+ { 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48,
+ 0x8000000000000000ULL };
+ int rc;
+
+ if (pfault_disable)
+ return -1;
+ __asm__ __volatile__(
+ " diag %1,%0,0x258\n"
+ "0: j 2f\n"
+ "1: la %0,8\n"
+ "2:\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .quad 0b,1b\n"
+ ".previous"
+ : "=d" (rc) : "a" (&refbk) : "cc" );
+ __ctl_set_bit(0, 9);
+ return rc;
+}
+
+void pfault_fini(void)
+{
+ pfault_refbk_t refbk =
+ { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
+
+ if (pfault_disable)
+ return;
+ __ctl_clear_bit(0, 9);
+ __asm__ __volatile__(
+ " diag %0,0,0x258\n"
+ "0:\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .quad 0b,0b\n"
+ ".previous"
+ : : "a" (&refbk) : "cc" );
+}
+
+asmlinkage void
+pfault_interrupt(struct pt_regs *regs, __u16 error_code)
+{
+ struct task_struct *tsk;
+ wait_queue_head_t queue;
+ wait_queue_head_t *qp;
+ __u16 subcode;
+
+ /*
+ * Get the external interruption subcode & pfault
+ * initial/completion signal bit. VM stores this
+ * in the 'cpu address' field associated with the
+ * external interrupt.
+ */
+ subcode = S390_lowcore.cpu_addr;
+ if ((subcode & 0xff00) != 0x0600)
+ return;
+
+ /*
+ * Get the token (= address of kernel stack of affected task).
+ */
+ tsk = (struct task_struct *)
+ (*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
+
+ /*
+ * We got all needed information from the lowcore and can
+ * now safely switch on interrupts.
+ */
+ if (regs->psw.mask & PSW_PROBLEM_STATE)
+ __sti();
+
+ if (subcode & 0x0080) {
+ /* signal bit is set -> a page has been swapped in by VM */
+ qp = (wait_queue_head_t *)
+ xchg(&tsk->thread.pfault_wait, -1);
+ if (qp != NULL) {
+ /* Initial interrupt was faster than the completion
+ * interrupt. pfault_wait is valid. Set pfault_wait
+ * back to zero and wake up the process. This can
+ * safely be done because the task is still sleeping
+ * and can't procude new pfaults. */
+ tsk->thread.pfault_wait = 0ULL;
+ wake_up(qp);
+ }
+ } else {
+ /* signal bit not set -> a real page is missing. */
+ init_waitqueue_head (&queue);
+ qp = (wait_queue_head_t *)
+ xchg(&tsk->thread.pfault_wait, (addr_t) &queue);
+ if (qp != NULL) {
+ /* Completion interrupt was faster than the initial
+ * interrupt (swapped in a -1 for pfault_wait). Set
+ * pfault_wait back to zero and exit. This can be
+ * done safely because tsk is running in kernel
+ * mode and can't produce new pfaults. */
+ tsk->thread.pfault_wait = 0ULL;
+ }
+
+ /* go to sleep */
+ wait_event(queue, tsk->thread.pfault_wait == 0ULL);
+ }
+}
+#endif
+
Index: init.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/s390x/mm/init.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- init.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ init.c 9 Apr 2002 17:03:18 -0000 1.2
@@ -35,143 +35,32 @@
#include <asm/pgalloc.h>
#include <asm/dma.h>
#include <asm/lowcore.h>
+#include <asm/tlb.h>
-static unsigned long totalram_pages;
+mmu_gather_t mmu_gathers[NR_CPUS];
-/*
- * empty_bad_page is the page that is used for page faults when linux
- * is out-of-memory. Older versions of linux just did a
- * do_exit(), but using this instead means there is less risk
- * for a process dying in kernel mode, possibly leaving an inode
- * unused etc..
- *
- * empty_bad_pte_table is the accompanying page-table: it is initialized
- * to point to BAD_PAGE entries.
- *
- * empty_bad_pmd_table is the accompanying segment table: it is initialized
- * to point to empty_bad_pte_table page tables.
- *
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
+static unsigned long totalram_pages;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
-char empty_bad_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
-pmd_t empty_bad_pmd_table[PTRS_PER_PMD] __attribute__((__aligned__(PAGE_SIZE)));
-pte_t empty_bad_pte_table[PTRS_PER_PTE] __attribute__((__aligned__(PAGE_SIZE)));
-
-static int test_access(unsigned long loc)
-{
- static const int ssm_mask = 0x07000000L;
- int rc, i;
-
- rc = 0;
- for (i=0; i<2; i++) {
- __asm__ __volatile__(
- " slgr %0,%0\n"
- " ssm %1\n"
- " tprot 0(%2),0\n"
- "0: jne 1f\n"
- " lghi %0,1\n"
- "1: ssm %3\n"
- ".section __ex_table,\"a\"\n"
- " .align 8\n"
- " .quad 0b,1b\n"
- ".previous"
- : "+&d" (rc) : "i" (0), "a" (loc), "m" (ssm_mask)
- : "cc");
- if (rc == 0)
- break;
- loc += 0x100000;
- }
- return rc;
-}
-
-static pmd_t *get_bad_pmd_table(void)
-{
- pmd_t v;
- int i;
-
- pmd_set(&v, empty_bad_pte_table);
-
- for (i = 0; i < PTRS_PER_PMD; i++)
- empty_bad_pmd_table[i] = v;
-
- return empty_bad_pmd_table;
-}
-
-static pte_t *get_bad_pte_table(void)
-{
- pte_t v;
- int i;
-
- v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
-
- for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
- empty_bad_pte_table[i] = v;
-
- return empty_bad_pte_table;
-}
-
-pmd_t *
-get_pmd_slow(pgd_t *pgd, unsigned long offset)
-{
- pmd_t *pmd;
- int i;
-
- pmd = (pmd_t *) __get_free_pages(GFP_KERNEL,2);
- if (pgd_none(*pgd)) {
- if (pmd) {
- for (i = 0; i < PTRS_PER_PMD; i++)
- pmd_clear(pmd+i);
- pgd_set(pgd, pmd);
- return pmd + offset;
- }
- pmd = (pmd_t *) get_bad_pmd_table();
- pgd_set(pgd, pmd);
- return NULL;
- }
- free_pages((unsigned long)pmd,2);
- if (pgd_bad(*pgd))
- BUG();
- return (pmd_t *) pgd_page(*pgd) + offset;
-}
-
-pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
-{
- pte_t *pte;
- int i;
-
- pte = (pte_t*) __get_free_page(GFP_KERNEL);
- if (pmd_none(*pmd)) {
- if (pte) {
- for (i=0;i<PTRS_PER_PTE;i++)
- pte_clear(pte+i);
- pmd_set(pmd,pte);
- return pte + offset;
- }
- pte = (pte_t*) get_bad_pte_table();
- pmd_set(pmd,pte);
- return NULL;
- }
- free_page(__pa(pte));
- if (pmd_bad(*pmd))
- BUG();
- return (pte_t *) pmd_page(*pmd) + offset;
-}
int do_check_pgt_cache(int low, int high)
{
int freed = 0;
if(pgtable_cache_size > high) {
do {
- if(pgd_quicklist)
- free_pgd_slow(get_pgd_fast()), freed += 4;
- if(pmd_quicklist)
- free_pmd_slow(get_pmd_fast()), freed += 4;
- if(pte_quicklist)
- free_pte_slow(get_pte_fast()), freed++;
+ if(pgd_quicklist) {
+ free_pgd_slow(get_pgd_fast());
+ freed += 4;
+ }
+ if(pmd_quicklist) {
+ pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
+ freed += 4;
+ }
+ if(pte_quicklist) {
+ pte_free_slow(pte_alloc_one_fast(NULL, 0));
+ freed += 1;
+ }
} while(pgtable_cache_size > low);
}
return freed;
@@ -229,7 +118,7 @@
int i,j,k;
unsigned long address=0;
unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
- _REGION_TABLE;
+ _KERN_REGION_TABLE;
unsigned long end_mem = (unsigned long) __va(max_low_pfn*PAGE_SIZE);
static const int ssm_mask = 0x04000000L;
@@ -259,34 +148,34 @@
for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {
if (address >= end_mem) {
- pgd_clear(pg_dir);
- continue;
+ pgd_clear(pg_dir);
+ continue;
}
pm_dir = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE*4);
- pgd_set(pg_dir,pm_dir);
+ pgd_populate(&init_mm, pg_dir, pm_dir);
for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
- if (address >= end_mem) {
- pmd_clear(pm_dir);
- continue;
- }
-
+ if (address >= end_mem) {
+ pmd_clear(pm_dir);
+ continue;
+ }
+
pt_dir = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- pmd_set(pm_dir,pt_dir);
+ pmd_populate(&init_mm, pm_dir, pt_dir);
for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
- pte = mk_pte_phys(address, PAGE_KERNEL);
- if (address >= end_mem) {
- pte_clear(&pte);
- continue;
- }
- set_pte(pt_dir, pte);
- address += PAGE_SIZE;
+ pte = mk_pte_phys(address, PAGE_KERNEL);
+ if (address >= end_mem) {
+ pte_clear(&pte);
+ continue;
+ }
+ set_pte(pt_dir, pte);
+ address += PAGE_SIZE;
}
}
}
-
+
/* enable virtual mapping in kernel mode */
__asm__ __volatile__("lctlg 1,1,%0\n\t"
"lctlg 7,7,%0\n\t"
@@ -302,7 +191,6 @@
void __init mem_init(void)
{
unsigned long codesize, reservedpages, datasize, initsize;
- unsigned long tmp;
max_mapnr = num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
@@ -313,25 +201,7 @@
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
- /* mark usable pages in the mem_map[] and count reserved pages */
reservedpages = 0;
- tmp = 0;
- do {
- if (tmp && (tmp & 0x1ff) == 0 &&
- test_access(tmp * PAGE_SIZE) == 0) {
- printk("2M Segment 0x%016lX not available\n",
- tmp * PAGE_SIZE);
- do {
- set_bit(PG_reserved, &mem_map[tmp].flags);
- reservedpages++;
- tmp++;
- } while (tmp < max_low_pfn && (tmp & 0x1ff));
- } else {
- if (PageReserved(mem_map+tmp))
- reservedpages++;
- tmp++;
- }
- } while (tmp < max_low_pfn);
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
Index: ioremap.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/s390x/mm/ioremap.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- ioremap.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ ioremap.c 9 Apr 2002 17:03:18 -0000 1.2
@@ -54,7 +54,7 @@
if (address >= end)
BUG();
do {
- pte_t * pte = pte_alloc_kernel(pmd, address);
+ pte_t * pte = pte_alloc(&init_mm, pmd, address);
if (!pte)
return -ENOMEM;
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -67,6 +67,7 @@
static int remap_area_pages(unsigned long address, unsigned long phys_addr,
unsigned long size, unsigned long flags)
{
+ int error;
pgd_t * dir;
unsigned long end = address + size;
@@ -75,17 +76,21 @@
flush_cache_all();
if (address >= end)
BUG();
+ spin_lock(&init_mm.page_table_lock);
do {
- pmd_t *pmd = pmd_alloc_kernel(dir, address);
+ pmd_t *pmd;
+ pmd = pmd_alloc(&init_mm, dir, address);
+ error = -ENOMEM;
if (!pmd)
- return -ENOMEM;
+ break;
if (remap_area_pmd(pmd, address, end - address,
phys_addr + address, flags))
- return -ENOMEM;
- set_pgdir(address, *dir);
+ break;
+ error = 0;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
} while (address && (address < end));
+ spin_unlock(&init_mm.page_table_lock);
flush_tlb_all();
return 0;
}
|