From: James S. <jsi...@us...> - 2001-10-29 00:11:31
|
Update of /cvsroot/linuxconsole/ruby/linux/arch/s390x/mm In directory usw-pr-cvs1:/tmp/cvs-serv10630/arch/s390x/mm Modified Files: fault.c Log Message: Synced to 2.4.13 Index: fault.c =================================================================== RCS file: /cvsroot/linuxconsole/ruby/linux/arch/s390x/mm/fault.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- fault.c 2001/09/02 23:14:23 1.1 +++ fault.c 2001/10/29 00:10:58 1.2 @@ -4,6 +4,7 @@ * S390 version * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation * Author(s): Hartmut Penner (hp...@de...) + * Ulrich Weigand (uwe...@de...) * * Derived from "arch/i386/mm/fault.c" * Copyright (C) 1995 Linus Torvalds @@ -21,6 +22,7 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/smp_lock.h> +#include <linux/init.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -32,6 +34,7 @@ #endif extern void die(const char *,struct pt_regs *,long); +static void force_sigsegv(struct task_struct *tsk, int code, void *address); /* * This routine handles page faults. It determines the address, @@ -52,19 +55,32 @@ unsigned long address; unsigned long fixup; int write; - unsigned long psw_mask; - unsigned long psw_addr; int si_code = SEGV_MAPERR; int kernel_address = 0; - /* - * get psw mask of Program old psw to find out, - * if user or kernel mode - */ + tsk = current; + mm = tsk->mm; + + /* + * Check for low-address protection. This needs to be treated + * as a special case because the translation exception code + * field is not guaranteed to contain valid data in this case. + */ + if ((error_code & 0xff) == 4 && !(S390_lowcore.trans_exc_code & 4)) { - psw_mask = S390_lowcore.program_old_psw.mask; - psw_addr = S390_lowcore.program_old_psw.addr; + /* Low-address protection hit in kernel mode means + NULL pointer write access in kernel mode. */ + if (!(regs->psw.mask & PSW_PROBLEM_STATE)) { + address = 0; + kernel_address = 1; + goto no_context; + } + /* Low-address protection hit in user mode 'cannot happen'. */ + die ("Low-address protection", regs, error_code); + do_exit(SIGKILL); + } + /* * get the failing address * more specific the segment and page table portion of @@ -73,11 +89,6 @@ address = S390_lowcore.trans_exc_code&-4096L; - tsk = current; - mm = tsk->mm; - - if (in_interrupt() || !mm) - goto no_context; /* * Check which address space the address belongs to @@ -108,6 +119,7 @@ } } die("page fault via unknown access register", regs, error_code); + do_exit(SIGKILL); break; case 2: /* Secondary Segment Table Descriptor */ @@ -116,6 +128,11 @@ break; } + /* + * Check whether we have a user MM in the first place. + */ + if (in_interrupt() || !mm) + goto no_context; /* * When we get here, the fault happened in the current @@ -125,10 +142,8 @@ down_read(&mm->mmap_sem); vma = find_vma(mm, address); - if (!vma) { - printk("no vma for address %lX\n",address); + if (!vma) goto bad_area; - } if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) @@ -158,6 +173,7 @@ goto bad_area; } + survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -187,8 +203,7 @@ up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ - if (psw_mask & PSW_PROBLEM_STATE) { - struct siginfo si; + if (regs->psw.mask & PSW_PROBLEM_STATE) { tsk->thread.prot_addr = address; tsk->thread.trap_no = error_code; #ifndef CONFIG_SYSCTL @@ -205,10 +220,8 @@ show_regs(regs); } #endif - si.si_signo = SIGSEGV; - si.si_code = si_code; - si.si_addr = (void*) address; - force_sig_info(SIGSEGV, &si, tsk); + + force_sigsegv(tsk, si_code, (void *)address); return; } @@ -223,6 +236,7 @@ * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ + if (kernel_address) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %016lx\n", address); @@ -230,10 +244,6 @@ printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %016lx\n", address); -/* - * need to define, which information is useful here - */ - die("Oops", regs, error_code); do_exit(SIGKILL); @@ -244,8 +254,14 @@ */ out_of_memory: up_read(&mm->mmap_sem); + if (tsk->pid == 1) { + tsk->policy |= SCHED_YIELD; + schedule(); + down_read(&mm->mmap_sem); + goto survive; + } printk("VM: killing process %s\n", tsk->comm); - if (psw_mask & PSW_PROBLEM_STATE) + if (regs->psw.mask & PSW_PROBLEM_STATE) do_exit(SIGKILL); goto no_context; @@ -261,6 +277,151 @@ force_sig(SIGBUS, tsk); /* Kernel mode? Handle exceptions or die */ - if (!(psw_mask & PSW_PROBLEM_STATE)) + if (!(regs->psw.mask & PSW_PROBLEM_STATE)) goto no_context; } + +/* + * Send SIGSEGV to task. This is an external routine + * to keep the stack usage of do_page_fault small. + */ +static void force_sigsegv(struct task_struct *tsk, int code, void *address) +{ + struct siginfo si; + si.si_signo = SIGSEGV; + si.si_code = code; + si.si_addr = address; + force_sig_info(SIGSEGV, &si, tsk); +} + + +#ifdef CONFIG_PFAULT +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable = 0; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} + +__setup("nopfault", nopfault); + +typedef struct { + __u16 refdiagc; + __u16 reffcode; + __u16 refdwlen; + __u16 refversn; + __u64 refgaddr; + __u64 refselmk; + __u64 refcmpmk; + __u64 reserved; +} __attribute__ ((packed)) pfault_refbk_t; + +typedef struct _pseudo_wait_t { + struct _pseudo_wait_t *next; + wait_queue_head_t queue; + unsigned long address; + int resolved; +} pseudo_wait_t; + +int pfault_init(void) +{ + pfault_refbk_t refbk = + { 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, + 0x8000000000000000ULL }; + int rc; + + if (pfault_disable) + return -1; + __asm__ __volatile__( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" + "2:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .quad 0b,1b\n" + ".previous" + : "=d" (rc) : "a" (&refbk) : "cc" ); + __ctl_set_bit(0, 9); + return rc; +} + +void pfault_fini(void) +{ + pfault_refbk_t refbk = + { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + + if (pfault_disable) + return; + __ctl_clear_bit(0, 9); + __asm__ __volatile__( + " diag %0,0,0x258\n" + "0:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .quad 0b,0b\n" + ".previous" + : : "a" (&refbk) : "cc" ); +} + +asmlinkage void +pfault_interrupt(struct pt_regs *regs, __u16 error_code) +{ + struct task_struct *tsk; + wait_queue_head_t queue; + wait_queue_head_t *qp; + __u16 subcode; + + /* + * Get the external interruption subcode & pfault + * initial/completion signal bit. VM stores this + * in the 'cpu address' field associated with the + * external interrupt. + */ + subcode = S390_lowcore.cpu_addr; + if ((subcode & 0xff00) != 0x0600) + return; + + /* + * Get the token (= address of kernel stack of affected task). + */ + tsk = (struct task_struct *) + (*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE); + + if (subcode & 0x0080) { + /* signal bit is set -> a page has been swapped in by VM */ + qp = (wait_queue_head_t *) + xchg(&tsk->thread.pfault_wait, -1); + if (qp != NULL) { + /* Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't procude new pfaults. */ + tsk->thread.pfault_wait = 0ULL; + wake_up(qp); + } + } else { + /* signal bit not set -> a real page is missing. */ + init_waitqueue_head (&queue); + qp = (wait_queue_head_t *) + xchg(&tsk->thread.pfault_wait, (addr_t) &queue); + if (qp != NULL) { + /* Completion interrupt was faster than the initial + * interrupt (swapped in a -1 for pfault_wait). Set + * pfault_wait back to zero and exit. This can be + * done safely because tsk is running in kernel + * mode and can't produce new pfaults. */ + tsk->thread.pfault_wait = 0ULL; + } + + /* go to sleep */ + wait_event(queue, tsk->thread.pfault_wait == 0ULL); + } +} +#endif + |