From: NIIBE Y. <gn...@m1...> - 2001-12-28 02:59:02
|
Here is an update of FPU patch, which includes fcnvsd exception fix by Kaz. Please note that we need new GCC to build the kernel with this patch. ftp://ftp.m17n.org/pub/linux-sh/testing/gcc-sh-linux_3.0.3-2.diff.gz For me, it works fine with the case of GNU Java. 2001-12-28 NIIBE Yutaka <gn...@m1...> (do_fpu_error): Handle denormalized floating point number. Remove un-needed grab_fpu. * arch/sh/kernel/head.S (_stext): Don't initialize FPU, as FPU is not used by kernel. (SR): Initial value with FD=1 (no FPU use). * include/asm-sh/processor.h (unlazy_fpu): Don't need to grab_fpu. (clear_fpu): Call release_fpu. * arch/sh/kernel/entry.S (PF_USEDFPU, __PF_USEDFPU): Removed. (restore_all, handle_exception): Simplified. (__fpu_prepare_fd, __init_task_flags): Removed * arch/sh/kernel/fpu.c (enable_fpu_in_danger): Removed. (fpu_prepare_fd): Removed. 2001-12-28 Kazumoto Kojima <kk...@rr...> * include/asm-sh/processor.h (FPSCR_ENABLE_MASK, FPSCR_FLAG_MASK): New macros. * arch/sh/kernel/fpu.c (denormal_to_double, ieee_fpe_handler): New functions. Index: arch/sh/kernel/entry.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/entry.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 entry.S --- arch/sh/kernel/entry.S 2001/10/15 20:44:47 1.1.1.1 +++ arch/sh/kernel/entry.S 2001/12/28 02:48:09 @@ -66,7 +66,6 @@ need_resched = 20 tsk_ptrace = 24 PT_TRACESYS = 0x00000002 -PF_USEDFPU = 0x00100000 ENOSYS = 38 EINVAL = 22 @@ -567,12 +566,6 @@ __irq_stat: .align 2 restore_all: -#if defined(__SH4__) - mov.l __fpu_prepare_fd, r0 - jsr @r0 - stc sr, r4 -#endif - ! mov.l @r15+, r0 mov.l @r15+, r1 mov.l @r15+, r2 @@ -585,7 +578,7 @@ restore_all: stc sr, r8 mov.l __blrb_flags, r9 ! BL =1, RB=1 or r9, r8 - ldc r8, sr ! here, change the register bank + ldc r8, sr ! here, change the register bank ! mov.l @r15+, r8 mov.l @r15+, r9 @@ -594,25 +587,25 @@ restore_all: mov.l @r15+, r12 mov.l @r15+, r13 mov.l @r15+, r14 - mov.l @r15+, k4 ! original stack pointer + mov.l @r15+, k4 ! original stack pointer ldc.l @r15+, spc lds.l @r15+, pr - mov.l @r15+, k3 ! original SR + mov.l @r15+, k3 ! original SR ldc.l @r15+, gbr lds.l @r15+, mach lds.l @r15+, macl - add #4, r15 ! Skip syscall number + add #4, r15 ! Skip syscall number ! ! Calculate new SR value - mov k3, k2 ! original SR value + mov k3, k2 ! original SR value mov.l 1f, k1 stc sr, k0 - and k1, k0 ! Get current FD-bit + and k1, k0 ! Get current FD-bit mov.l 2f, k1 - and k1, k2 ! Mask orignal SR value - or k0, k2 ! Inherit current FD-bit + and k1, k2 ! Mask orignal SR value + or k0, k2 ! Inherit current FD-bit ! - mov k3, k0 ! Calculate IMASK-bits + mov k3, k0 ! Calculate IMASK-bits shlr2 k0 and #0x3c, k0 cmp/eq #0x3c, k0 @@ -620,69 +613,15 @@ restore_all: shll2 k0 mov g_imask, k0 ! -7: or k0, k2 ! Set the IMASK-bits +7: or k0, k2 ! Set the IMASK-bits ldc k2, ssr ! -#if defined(__SH4__) - shll k2 - shll k2 - bf 9f ! user mode - /* Kernel to kernel transition */ - mov.l 1f, k1 - tst k1, k3 - bf 9f ! it hadn't FPU - ! Kernel to kernel and FPU was used - ! There's the case we don't get FPU now - stc sr, k2 - tst k1, k2 - bt 8f - ! We need to grab FPU here - xor k1, k2 - ldc k2, sr ! Grab FPU - mov.l __init_task_flags, k1 - mov.l @k1, k2 - mov.l __PF_USEDFPU, k0 - or k0, k2 - mov.l k2, @k1 ! Set init_task.flags |= PF_USEDFPU - ! - ! Restoring FPU... - ! -8: mov.l 3f, k1 - lds k1, fpscr - fmov.s @r15+, fr0 - fmov.s @r15+, fr1 - fmov.s @r15+, fr2 - fmov.s @r15+, fr3 - fmov.s @r15+, fr4 - fmov.s @r15+, fr5 - fmov.s @r15+, fr6 - fmov.s @r15+, fr7 - fmov.s @r15+, fr8 - fmov.s @r15+, fr9 - fmov.s @r15+, fr10 - fmov.s @r15+, fr11 - fmov.s @r15+, fr12 - fmov.s @r15+, fr13 - fmov.s @r15+, fr14 - fmov.s @r15+, fr15 - lds.l @r15+, fpscr - lds.l @r15+, fpul -9: -#endif mov k4, r15 rte nop .align 2 __blrb_flags: .long 0x30000000 -#if defined(__SH4__) -__fpu_prepare_fd: - .long SYMBOL_NAME(fpu_prepare_fd) -__init_task_flags: - .long SYMBOL_NAME(init_task_union)+4 -__PF_USEDFPU: - .long PF_USEDFPU -#endif 1: .long 0x00008000 ! FD 2: .long 0xffff7f0f ! ~(IMASK+FD) 3: .long 0x00080000 ! SZ=0, PR=1 @@ -732,61 +671,21 @@ handle_exception: ! Using k0, k1 for scratch registers (r0_bank1, r1_bank), ! save all registers onto stack. ! - stc ssr, k0 ! from kernel space? - shll k0 ! Check MD bit (bit30) by shifting it into the T bit - shll k0 -#if defined(__SH4__) - bf/s 8f ! it's from user to kernel transition - mov r15, k0 ! save original stack to k0 - /* It's a kernel to kernel transition. */ - /* Is the FPU disabled? */ - mov.l 2f, k1 - stc ssr, k0 - tst k1, k0 - mov.l 4f, k1 - bf/s 9f ! FPU is not enabled, no need to save it - mov r15, k0 ! save original stack to k0 - ! FPU is enabled, save it - ! /* XXX: Need to save another bank of FPU if all FPU feature is used */ - ! /* Currently it's not the case for GCC (only udivsi3_i4, divsi3_i4) */ - sts.l fpul, @-r15 - sts.l fpscr, @-r15 - mov.l 6f, k1 - lds k1, fpscr - mov.l 3f, k1 - fmov.s fr15, @-r15 - fmov.s fr14, @-r15 - fmov.s fr13, @-r15 - fmov.s fr12, @-r15 - fmov.s fr11, @-r15 - fmov.s fr10, @-r15 - fmov.s fr9, @-r15 - fmov.s fr8, @-r15 - fmov.s fr7, @-r15 - fmov.s fr6, @-r15 - fmov.s fr5, @-r15 - fmov.s fr4, @-r15 - fmov.s fr3, @-r15 - fmov.s fr2, @-r15 - fmov.s fr1, @-r15 - bra 9f - fmov.s fr0, @-r15 -#else - mov.l 3f, k1 - bt/s 9f ! it's a kernel to kernel transition, and skip the FPU save. - mov r15, k0 ! save original stack to k0 anyway -#endif -8: /* User space to kernel */ + stc ssr, k0 ! Is it from kernel space? + shll k0 ! Check MD bit (bit30) by shifting it into... + shll k0 ! ...the T bit + bt/s 9f ! It's a kernel to kernel transition. + mov r15, k0 ! save original stack to k0 + /* User space to kernel */ mov #0x20, k1 - shll8 k1 ! k1 <= 8192 == THREAD_SIZE + shll8 k1 ! k1 <= 8192 == THREAD_SIZE add current, k1 mov k1, r15 ! change to kernel stack ! - mov.l 4f, k1 ! let kernel release FPU -9: ! Save the user registers on the stack. - ! At this point, k1 should have been set to the new SR value - mov #-1, k4 - mov.l k4, @-r15 ! syscall_nr (default: -1) +9: mov #-1, k4 + mov.l 3f, k1 + ! Save the user registers on the stack. + mov.l k4, @-r15 ! syscall_nr (default: -1) ! sts.l macl, @-r15 sts.l mach, @-r15 @@ -806,11 +705,11 @@ handle_exception: mov.l r9, @-r15 mov.l r8, @-r15 ! - stc sr, r8 ! Back to normal register bank, and - or k1, r8 ! Block all interrupts, may release FPU + stc sr, r8 ! Back to normal register bank, and + or k1, r8 ! Block all interrupts mov.l 5f, k1 - and k1, r8 ! ... - ldc r8, sr ! ...changed here. + and k1, r8 ! ... + ldc r8, sr ! ...changed here. ! mov.l r7, @-r15 mov.l r6, @-r15 @@ -831,9 +730,7 @@ handle_exception: nop .align 2 1: .long SYMBOL_NAME(exception_handling_table) -2: .long 0x00008000 ! FD=1 3: .long 0x000000f0 ! FD=0, IMASK=15 -4: .long 0x000080f0 ! FD=1, IMASK=15 5: .long 0xcfffffff ! RB=0, BL=0 6: .long 0x00080000 ! SZ=0, PR=1 Index: arch/sh/kernel/fpu.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/fpu.c,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 fpu.c --- arch/sh/kernel/fpu.c 2001/10/15 20:44:48 1.1.1.1 +++ arch/sh/kernel/fpu.c 2001/12/28 02:48:09 @@ -18,6 +18,10 @@ #include <asm/processor.h> #include <asm/io.h> +/* + * Save FPU registers onto task structure. + * Assume called with FPU enabled (SR.FD=0). + */ void save_fpu(struct task_struct *tsk) { @@ -118,7 +122,8 @@ restore_fpu(struct task_struct *tsk) * double precission represents signaling NANS. */ -void fpu_init(void) +static void +fpu_init(void) { asm volatile("lds %0, fpul\n\t" "lds %1, fpscr\n\t" @@ -160,15 +165,125 @@ void fpu_init(void) : "r" (0), "r" (FPSCR_INIT)); } +/** + * denormal_to_double - Given denormalized float number, + * store double float + * + * @fpu: Pointer to sh_fpu_hard structure + * @n: Index to FP register + */ +static void +denormal_to_double (struct sh_fpu_hard_struct *fpu, int n) +{ + unsigned long du, dl; + unsigned long x = fpu->fpul; + int exp = 1023 - 126; + + if (x != 0 && (x & 0x7f800000) == 0) { + du = (x & 0x80000000); + while ((x & 0x00800000) == 0) { + x <<= 1; + exp--; + } + x &= 0x007fffff; + du |= (exp << 20) | (x >> 3); + dl = x << 29; + + fpu->fp_regs[n] = du; + fpu->fp_regs[n+1] = dl; + } +} + +/** + * ieee_fpe_handler - Handle denormalized number exception + * + * @regs: Pointer to register structure + * + * Returns 1 when it's handled (should not cause exception). + */ +static int +ieee_fpe_handler (struct pt_regs *regs) +{ + unsigned short insn = *(unsigned short *) regs->pc; + unsigned short finsn; + unsigned long nextpc; + int nib[4] = { + (insn >> 12) & 0xf, + (insn >> 8) & 0xf, + (insn >> 4) & 0xf, + insn & 0xf}; + + if (nib[0] == 0xb || + (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ + regs->pr = regs->pc + 4; + + if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ + nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); + finsn = *(unsigned short *) (regs->pc + 2); + } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ + if (regs->sr & 1) + nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); + else + nextpc = regs->pc + 4; + finsn = *(unsigned short *) (regs->pc + 2); + } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ + if (regs->sr & 1) + nextpc = regs->pc + 4; + else + nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); + finsn = *(unsigned short *) (regs->pc + 2); + } else if (nib[0] == 0x4 && nib[3] == 0xb && + (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ + nextpc = regs->regs[nib[1]]; + finsn = *(unsigned short *) (regs->pc + 2); + } else if (nib[0] == 0x0 && nib[3] == 0x3 && + (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ + nextpc = regs->pc + 4 + regs->regs[nib[1]]; + finsn = *(unsigned short *) (regs->pc + 2); + } else if (insn == 0x000b) { /* rts */ + nextpc = regs->pr; + finsn = *(unsigned short *) (regs->pc + 2); + } else { + nextpc = regs->pc + 2; + finsn = insn; + } + + if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ + struct task_struct *tsk = current; + + save_fpu(tsk); + if ((tsk->thread.fpu.hard.fpscr & (1 << 17))) { + /* FPU error */ + denormal_to_double (&tsk->thread.fpu.hard, + (finsn >> 8) & 0xf); + tsk->thread.fpu.hard.fpscr &= + ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); + grab_fpu(); + restore_fpu(tsk); + tsk->flags |= PF_USEDFPU; + } else { + tsk->thread.trap_no = 11; + tsk->thread.error_code = 0; + force_sig(SIGFPE, tsk); + } + + regs->pc = nextpc; + return 1; + } + + return 0; +} + asmlinkage void do_fpu_error(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, struct pt_regs regs) { struct task_struct *tsk = current; - regs.pc += 2; + if (ieee_fpe_handler (®s)) + return; - grab_fpu(); + regs.pc += 2; save_fpu(tsk); tsk->thread.trap_no = 11; tsk->thread.error_code = 0; @@ -181,102 +296,12 @@ do_fpu_state_restore(unsigned long r4, u { struct task_struct *tsk = current; - if (!user_mode(®s)) { - if (tsk != &init_task) { - unlazy_fpu(tsk); - } - tsk = &init_task; - if (tsk->flags & PF_USEDFPU) { - /* - * This weird situation can be occurred. - * - * There's race condition in __cli: - * - * (1) SR --> register - * (2) Set IMASK of register - * (3) SR <-- register - * - * Between (1) and (2), or (2) and (3) getting - * interrupt, and interrupt handler (or - * softirq) may use FPU. - * - * Then, SR.FD is overwritten by (3). - * - * This results init_task.PF_USEDFPU is on, - * with SR.FD == 1. - * - */ - release_fpu(); - return; - } - } - grab_fpu(); - if (tsk->used_math) { - /* Using the FPU again. */ - restore_fpu(tsk); - } else { - /* First time FPU user. */ - fpu_init(); - tsk->used_math = 1; - } - tsk->flags |= PF_USEDFPU; - release_fpu(); -} - -/* - * Change current FD flag to set FD flag back to exception - */ -asmlinkage void -fpu_prepare_fd(unsigned long sr, unsigned long r5, unsigned long r6, - unsigned long r7, struct pt_regs regs) -{ - __cli(); if (!user_mode(®s)) { - if (init_task.flags & PF_USEDFPU) - grab_fpu(); - else { - if (!(sr & SR_FD)) { - BUG(); - release_fpu(); - } - } + printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); return; } - if (sr & SR_FD) { /* Kernel doesn't grab FPU */ - if (current->flags & PF_USEDFPU) - grab_fpu(); - else { - if (init_task.flags & PF_USEDFPU) { - /* - * This weird situation can be occurred. - * See the comment in do_fpu_state_restore. - */ - grab_fpu(); - save_fpu(&init_task); - } - } - } else { - if (init_task.flags & PF_USEDFPU) - save_fpu(&init_task); - else { - BUG(); - release_fpu(); - } - } -} - -/* Short cut for the FPU exception */ -asmlinkage void -enable_fpu_in_danger(void) -{ - struct task_struct *tsk = current; - - if (tsk != &init_task) - unlazy_fpu(tsk); - - tsk = &init_task; if (tsk->used_math) { /* Using the FPU again. */ restore_fpu(tsk); Index: arch/sh/kernel/head.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/head.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 head.S --- arch/sh/kernel/head.S 2001/10/15 20:44:48 1.1.1.1 +++ arch/sh/kernel/head.S 2001/12/28 02:48:09 @@ -50,12 +50,6 @@ ENTRY(_stext) sub r1, r0 ! ldc r0, r7_bank ! ... and init_task ! -#if defined(__SH4__) - ! Initialize fpu - mov.l 7f, r0 - jsr @r0 - nop -#endif ! Enable cache mov.l 6f, r0 jsr @r0 @@ -74,12 +68,9 @@ ENTRY(_stext) nop .balign 4 -1: .long 0x400000F0 ! MD=1, RB=0, BL=0, FD=0, IMASK=0xF +1: .long 0x400080F0 ! MD=1, RB=0, BL=0, FD=1, IMASK=0xF 2: .long SYMBOL_NAME(stack) 3: .long SYMBOL_NAME(__bss_start) 4: .long SYMBOL_NAME(_end) 5: .long SYMBOL_NAME(start_kernel) 6: .long SYMBOL_NAME(cache_init) -#if defined(__SH4__) -7: .long SYMBOL_NAME(fpu_init) -#endif Index: include/asm-sh/processor.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/processor.h,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 processor.h --- include/asm-sh/processor.h 2001/10/15 20:45:10 1.1.1.1 +++ include/asm-sh/processor.h 2001/12/28 02:48:09 @@ -184,18 +184,22 @@ extern void save_fpu(struct task_struct #define unlazy_fpu(tsk) do { \ if ((tsk)->flags & PF_USEDFPU) { \ - grab_fpu(); \ save_fpu(tsk); \ } \ } while (0) #define clear_fpu(tsk) do { \ - if ((tsk)->flags & PF_USEDFPU) \ + if ((tsk)->flags & PF_USEDFPU) { \ (tsk)->flags &= ~PF_USEDFPU; \ + release_fpu(); \ + } \ } while (0) /* Double presision, NANS as NANS, rounding to nearest, no exceptions */ #define FPSCR_INIT 0x00080000 + +#define FPSCR_CAUSE_MASK 0x0001f000 /* Cause bits */ +#define FPSCR_FLAG_MASK 0x0000007c /* Flag bits */ /* * Return saved PC of a blocked thread. -- |