From: NIIBE Y. <gn...@m1...> - 2002-03-26 05:49:02
|
OK, I've fixed cache-sh4.c. The problem was confusion of virtual address and physcall address. Note that new flush_cache_range is quite slow for exec. It's two to four time slower than simple flush_cache_all implementation. New flush_cache_range would be good if we want cache to have valid data (avoid flushing), but it takes long time for SuperH to handle cache. 2002-03-26 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_mm): Don't check mm->context, it's for TLB handling. (flush_cache_range): Likewise. (flush_cache_mm): Fix the comment. The alias issue is there for write-through cache too. (flush_cache_range): Don't handle in P2. 2002-03-26 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_range): Bug fix. Handle the case where PMD is none or bad. The argument to __flush_icache_page/__flush_dcache_page is physical address (was: virtual address). Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.6 diff -u -3 -p -r1.6 cache-sh4.c --- arch/sh/mm/cache-sh4.c 22 Mar 2002 12:57:10 -0000 1.6 +++ arch/sh/mm/cache-sh4.c 26 Mar 2002 05:43:03 -0000 @@ -298,15 +298,13 @@ void flush_cache_mm(struct mm_struct *mm * FIXME: Really, the optimal solution here would be able to flush out * individual lines created by the specified context, but this isn't * feasible for a number of architectures (such as MIPS, and some - * SPARC) .. is this possible for SuperH? (This is a non-issue if the - * SH4 cache is configured in write-through mode). + * SPARC) .. is this possible for SuperH? * - * In the meantime, we'll just flush all of the caches if we have a - * valid mm context.. this seems to be the simplest way to avoid at - * least a few wasted cache flushes. -Lethal + * In the meantime, we'll just flush all of the caches.. this + * seems to be the simplest way to avoid at least a few wasted + * cache flushes. -Lethal */ - if (mm->context != 0) - flush_cache_all(); + flush_cache_all(); } /* @@ -324,35 +322,28 @@ void flush_cache_range(struct vm_area_st unsigned long flags; struct mm_struct *mm = vma->vm_mm; - if (mm->context == 0) - return; - start &= PAGE_MASK; - if (mm->context != current->active_mm->context) { - flush_cache_all(); - } else { - pgd_t *pgd; - pmd_t *pmd; + save_and_cli(flags); + for (; start < end; start += PAGE_SIZE) { + pgd_t *pgd = pgd_offset(mm, start); + pmd_t *pmd = pmd_offset(pgd, start); pte_t *pte; + unsigned long phys; - save_and_cli(flags); - jump_to_P2(); - - for (start; start < end; start += PAGE_SIZE) { - pgd = pgd_offset(mm, start); - pmd = pmd_offset(pgd, start); - pte = pte_offset_kernel(pmd, start); - - if (pte_val(*pte) & _PAGE_PRESENT) { - __flush_icache_page(start); - __flush_dcache_page(start); - } + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + start &= ~((1 << PMD_SHIFT) -1); + start += (1 << PMD_SHIFT); + continue; + } + pte = pte_offset_kernel(pmd, start); + phys = pte_val(*pte)&PTE_PHYS_MASK; + if (pte_val(*pte) & _PAGE_PRESENT) { + __flush_icache_page(phys); + __flush_dcache_page(phys); } - - back_to_P1(); - restore_flags(flags); } + restore_flags(flags); } /* |
From: NIIBE Y. <gn...@m1...> - 2002-03-27 05:56:00
|
Optimized the flushing. There is a requirement in SH4 to control cache from P2 area. P2 area is the area where cache is not used at all. This means, the cycles for instruction could be quite large. In other word, if we could save number of instructions, we see performance improvement. I've done: Four --> Three instruction fetch per single cache line flush. 2002-03-27 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/copy_page.S: File merged with __copy_user_page-sh4.S. * arch/sh/mm/clear_page.S: File merged with __clear_user.S. (__flush_cache_4096): New function. * arch/sh/mm/Makefile: Remove __copy_user_page-sh4.S and __clear_user.S. * arch/sh/mm/cache-sh4.c (__flush_icache_page): Removed. (flush_cache_4096): New function. 2002-03-27 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (__flush_cache_page): New function. (flush_cache_range, flush_cache_page, flush_icache_user_range): Use __flush_cache_page. Index: arch/sh/mm/Makefile =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/Makefile,v retrieving revision 1.2 diff -u -3 -p -r1.2 Makefile --- arch/sh/mm/Makefile 26 Mar 2002 01:56:39 -0000 1.2 +++ arch/sh/mm/Makefile 27 Mar 2002 05:15:07 -0000 @@ -10,8 +10,8 @@ O_TARGET := mm.o obj-y := init.o fault.o extable.o clear_page.o copy_page.o -obj-$(CONFIG_CPU_SH3) += cache-sh3.o __clear_user.o -obj-$(CONFIG_CPU_SH4) += cache-sh4.o __clear_user.o __copy_user_page-sh4.o ioremap.o +obj-$(CONFIG_CPU_SH3) += cache-sh3.o +obj-$(CONFIG_CPU_SH4) += cache-sh4.o ioremap.o USE_STANDARD_AS_RULE := true Index: arch/sh/mm/__clear_user.S =================================================================== RCS file: arch/sh/mm/__clear_user.S diff -N arch/sh/mm/__clear_user.S --- arch/sh/mm/__clear_user.S 26 Mar 2002 01:56:39 -0000 1.1 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,138 +0,0 @@ -/* $Id: __clear_user.S,v 1.1 2002/03/26 01:56:39 gniibe Exp $ - * - * __clear_user_page implementation of SuperH - * - * Copyright (C) 2001, 2002 Niibe Yutaka & Kaz Kojima - * - */ - -#if defined(__SH4__) -/* - * __clear_user_page - * @to: P1 address (with same color) - * @orig_to: P1 address - * - * void __clear_user_page(void *to, void *orig_to) - */ - -/* - * r0 --- scratch - * r4 --- to - * r5 --- orig_to - * r6 --- to + 4096 - */ -#include <linux/linkage.h> -ENTRY(__clear_user_page) - mov.w .L4096,r0 - mov r4,r6 - add r0,r6 - mov #0,r0 - ! -1: ocbi @r5 - add #32,r5 - movca.l r0,@r4 - mov r4,r1 - add #32,r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - add #28,r4 - cmp/eq r6,r4 - bf/s 1b - ocbwb @r1 - ! - rts - nop -.L4096: .word 4096 -#endif - -ENTRY(__clear_user) - ! - mov #0, r0 - mov #0xe0, r1 ! 0xffffffe0 - ! - ! r4..r4&~32 -------- not aligned [ Area 0 ] - ! r4&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] - ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] - ! - ! Clear area 0 - mov r4, r2 - and r1, r2 - cmp/eq r4, r2 - bt/s area1 - mov r4, r3 - sub r2, r3 - mov r4, r2 - ! -l0: dt r3 -0: mov.b r0, @r2 - bf/s l0 - add #1, r2 - ! - mov r4, r3 - add r5, r3 - and r1, r3 - ! - ! Clear area 1 -area1: -#if defined(__SH4__) -1: movca.l r0, @r2 -#else -1: mov.l r0, @r2 -#endif - add #4, r2 -2: mov.l r0, @r2 - add #4, r2 -3: mov.l r0, @r2 - add #4, r2 -4: mov.l r0, @r2 - add #4, r2 -5: mov.l r0, @r2 - add #4, r2 -6: mov.l r0, @r2 - add #4, r2 -7: mov.l r0, @r2 - add #4, r2 -8: mov.l r0, @r2 - add #4, r2 - cmp/hi r2, r3 - bt/s 1b - nop - ! - ! Clear area 2 - add r5, r4 - cmp/eq r4, r2 - bt/s done - sub r2, r4 -l2: dt r4 -9: mov.b r0, @r2 - bf/s l2 - add #1, r2 - ! -done: rts - nop ! return 0 as normal return - - ! return the number of bytes remained -bad_clear_user: - mov r4, r0 - mov r5, r0 - rts - sub r2, r0 - -.section __ex_table,"a" - .align 2 - .long 0b, bad_clear_user - .long 1b, bad_clear_user - .long 2b, bad_clear_user - .long 3b, bad_clear_user - .long 4b, bad_clear_user - .long 5b, bad_clear_user - .long 6b, bad_clear_user - .long 7b, bad_clear_user - .long 8b, bad_clear_user - .long 9b, bad_clear_user -.previous Index: arch/sh/mm/__copy_user_page-sh4.S =================================================================== RCS file: arch/sh/mm/__copy_user_page-sh4.S diff -N arch/sh/mm/__copy_user_page-sh4.S --- arch/sh/mm/__copy_user_page-sh4.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,69 +0,0 @@ -/* $Id: __copy_user_page-sh4.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ - * - * __copy_user_page implementation of SuperH - * - * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima - * - */ - -/* - * __copy_user_page - * @to: P1 address (with same color) - * @from: P1 address - * @orig_to: P1 address - * - * void __copy_user_page(void *to, void *from, void *orig_to) - */ - -/* - * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch - * r8 --- from + 4096 - * r9 --- orig_to - * r10 --- to - * r11 --- from - */ -#include <linux/linkage.h> -ENTRY(__copy_user_page) - mov.l r8,@-r15 - mov.l r9,@-r15 - mov.l r10,@-r15 - mov.l r11,@-r15 - mov r4,r10 - mov r5,r11 - mov r6,r9 - mov r5,r8 - mov.w .L4096,r0 - add r0,r8 - ! -1: ocbi @r9 - add #32,r9 - mov.l @r11+,r0 - mov.l @r11+,r1 - mov.l @r11+,r2 - mov.l @r11+,r3 - mov.l @r11+,r4 - mov.l @r11+,r5 - mov.l @r11+,r6 - mov.l @r11+,r7 - movca.l r0,@r10 - mov r10,r0 - add #32,r10 - mov.l r7,@-r10 - mov.l r6,@-r10 - mov.l r5,@-r10 - mov.l r4,@-r10 - mov.l r3,@-r10 - mov.l r2,@-r10 - mov.l r1,@-r10 - ocbwb @r0 - cmp/eq r11,r8 - bf/s 1b - add #28,r10 - ! - mov.l @r15+,r11 - mov.l @r15+,r10 - mov.l @r15+,r9 - mov.l @r15+,r8 - rts - nop -.L4096: .word 4096 Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.7 diff -u -3 -p -r1.7 cache-sh4.c --- arch/sh/mm/cache-sh4.c 27 Mar 2002 00:07:19 -0000 1.7 +++ arch/sh/mm/cache-sh4.c 27 Mar 2002 05:15:07 -0000 @@ -208,54 +208,40 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } +static inline void flush_cache_4096(unsigned long start, + unsigned long phys) +{ + register unsigned long addr __asm__ ("r4"); + register unsigned long data __asm__ ("r0"); + register unsigned long __r5 __asm__ ("r5") = phys; + register unsigned long __r6 __asm__ ("r6") = (0x1ffff000|CACHE_VALID); + register unsigned long __r7 __asm__ ("r7") = 0; + extern void __flush_cache_4096(unsigned long, unsigned long); + + asm volatile("jsr @%1; nop" + : "=r" (addr), "=r" (data) + : "0" (start), "1" (__flush_cache_4096 + 0x20000000), + "r" (__r5), "r" (__r6), "r" (__r7) + : "pr"); +} + /* * Writeback&Invalidate the D-cache of the page */ static void __flush_dcache_page(unsigned long phys) { - unsigned long addr, data; unsigned long flags; phys |= CACHE_VALID; save_and_cli(flags); - jump_to_P2(); /* Loop all the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY; - addr < (CACHE_OC_ADDRESS_ARRAY - +(CACHE_OC_NUM_ENTRIES<< CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - - back_to_P1(); - restore_flags(flags); -} - -static void __flush_icache_page(unsigned long phys) -{ - unsigned long addr, data; - unsigned long flags; - - phys |= CACHE_VALID; - - save_and_cli(flags); - jump_to_P2(); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); - /* Loop all the I-cache */ - for (addr = CACHE_IC_ADDRESS_ARRAY; - addr < (CACHE_IC_ADDRESS_ARRAY - +(CACHE_IC_NUM_ENTRIES<< CACHE_IC_ENTRY_SHIFT)); - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - - back_to_P1(); restore_flags(flags); } @@ -307,6 +293,36 @@ void flush_cache_mm(struct mm_struct *mm flush_cache_all(); } +static void __flush_cache_page(struct vm_area_struct *vma, + unsigned long address, + unsigned long phys) +{ + unsigned long flags; + + phys |= CACHE_VALID; + save_and_cli(flags); + + /* We only need to flush D-cache when we have alias */ + if ((address^phys) & CACHE_ALIAS) { + /* Loop 4K of the D-cache */ + flush_cache_4096( + CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), + phys); + /* Loop another 4K of the D-cache */ + flush_cache_4096( + CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), + phys); + } + + if (vma->vm_flags & VM_EXEC) + /* Loop 4K (half) of the I-cache */ + flush_cache_4096( + CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), + phys); + + restore_flags(flags); +} + /* * Write back and invalidate D-caches. * @@ -319,31 +335,35 @@ void flush_cache_mm(struct mm_struct *mm void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long flags; - struct mm_struct *mm = vma->vm_mm; + unsigned long p = start & PAGE_MASK; + pgd_t *dir; + pmd_t *pmd; + pte_t *pte; + pte_t entry; + unsigned long phys; - start &= PAGE_MASK; - - save_and_cli(flags); - for (; start < end; start += PAGE_SIZE) { - pgd_t *pgd = pgd_offset(mm, start); - pmd_t *pmd = pmd_offset(pgd, start); - pte_t *pte; - unsigned long phys; + dir = pgd_offset(vma->vm_mm, p); + pmd = pmd_offset(dir, p); + do { if (pmd_none(*pmd) || pmd_bad(*pmd)) { - start &= ~((1 << PMD_SHIFT) -1); - start += (1 << PMD_SHIFT); + p &= ~((1 << PMD_SHIFT) -1); + p += (1 << PMD_SHIFT); + pmd++; continue; } - pte = pte_offset_kernel(pmd, start); - phys = pte_val(*pte)&PTE_PHYS_MASK; - if (pte_val(*pte) & _PAGE_PRESENT) { - __flush_icache_page(phys); - __flush_dcache_page(phys); - } - } - restore_flags(flags); + pte = pte_offset_kernel(pmd, p); + do { + entry = *pte; + if ((pte_val(entry) & _PAGE_PRESENT)) { + phys = pte_val(entry)&PTE_PHYS_MASK; + __flush_cache_page(vma, p, phys); + } + pte++; + p += PAGE_SIZE; + } while (p < end && (unsigned long)pte & PAGE_MASK); + pmd++; + } while (p < end); } /* @@ -357,8 +377,7 @@ void flush_cache_page(struct vm_area_str pmd_t *pmd; pte_t *pte; pte_t entry; - unsigned long phys, addr, data; - unsigned long flags; + unsigned long phys; dir = pgd_offset(vma->vm_mm, address); pmd = pmd_offset(dir, address); @@ -366,49 +385,11 @@ void flush_cache_page(struct vm_area_str return; pte = pte_offset_kernel(pmd, address); entry = *pte; - if (pte_none(entry) || !pte_present(entry)) + if (!(pte_val(entry) & _PAGE_PRESENT)) return; phys = pte_val(entry)&PTE_PHYS_MASK; - - phys |= CACHE_VALID; - save_and_cli(flags); - jump_to_P2(); - - /* We only need to flush D-cache when we have alias */ - if ((address^phys) & CACHE_ALIAS) { - /* Loop 4K of the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS); - addr < (CACHE_OC_ADDRESS_ARRAY + (address & CACHE_ALIAS) - +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - /* Loop another 4K of the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS); - addr < (CACHE_OC_ADDRESS_ARRAY + (phys & CACHE_ALIAS) - +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - } - - if (vma->vm_flags & VM_EXEC) - /* Loop 4K of the I-cache */ - for (addr = CACHE_IC_ADDRESS_ARRAY|(address&0x1000); - addr < ((CACHE_IC_ADDRESS_ARRAY|(address&0x1000)) - +(CACHE_IC_NUM_ENTRIES/2<<CACHE_IC_ENTRY_SHIFT)); - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - back_to_P1(); - restore_flags(flags); + __flush_cache_page(vma, address, phys); } /* @@ -421,10 +402,7 @@ void flush_cache_page(struct vm_area_str void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len) { - if (test_bit(PG_mapped, &page->flags)) { - __flush_icache_page(PHYSADDR(page_address(page))); - __flush_dcache_page(PHYSADDR(page_address(page))); - } + __flush_cache_page(vma, addr, PHYSADDR(page_address(page))); } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 clear_page.S --- arch/sh/mm/clear_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ arch/sh/mm/clear_page.S 27 Mar 2002 05:15:07 -0000 @@ -1,10 +1,11 @@ /* $Id: clear_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ * - * clear_page implementation of SuperH + * __clear_user_page, __clear_user, clear_page implementation of SuperH * - * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima + * Copyright (C) 2001, 2002 Niibe Yutaka & Kaz Kojima * */ +#include <linux/linkage.h> /* * clear_page @@ -18,7 +19,6 @@ * r4 --- to * r5 --- to + 4096 */ -#include <linux/linkage.h> ENTRY(clear_page) mov r4,r5 mov.w .Llimit,r0 @@ -50,3 +50,160 @@ ENTRY(clear_page) rts nop .Llimit: .word (4096-28) + +ENTRY(__clear_user) + ! + mov #0, r0 + mov #0xe0, r1 ! 0xffffffe0 + ! + ! r4..r4&~32 -------- not aligned [ Area 0 ] + ! r4&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] + ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] + ! + ! Clear area 0 + mov r4, r2 + and r1, r2 + cmp/eq r4, r2 + bt/s area1 + mov r4, r3 + sub r2, r3 + mov r4, r2 + ! +l0: dt r3 +0: mov.b r0, @r2 + bf/s l0 + add #1, r2 + ! + mov r4, r3 + add r5, r3 + and r1, r3 + ! + ! Clear area 1 +area1: +#if defined(__SH4__) +1: movca.l r0, @r2 +#else +1: mov.l r0, @r2 +#endif + add #4, r2 +2: mov.l r0, @r2 + add #4, r2 +3: mov.l r0, @r2 + add #4, r2 +4: mov.l r0, @r2 + add #4, r2 +5: mov.l r0, @r2 + add #4, r2 +6: mov.l r0, @r2 + add #4, r2 +7: mov.l r0, @r2 + add #4, r2 +8: mov.l r0, @r2 + add #4, r2 + cmp/hi r2, r3 + bt/s 1b + nop + ! + ! Clear area 2 + add r5, r4 + cmp/eq r4, r2 + bt/s done + sub r2, r4 +l2: dt r4 +9: mov.b r0, @r2 + bf/s l2 + add #1, r2 + ! +done: rts + nop ! return 0 as normal return + + ! return the number of bytes remained +bad_clear_user: + mov r4, r0 + mov r5, r0 + rts + sub r2, r0 + +.section __ex_table,"a" + .align 2 + .long 0b, bad_clear_user + .long 1b, bad_clear_user + .long 2b, bad_clear_user + .long 3b, bad_clear_user + .long 4b, bad_clear_user + .long 5b, bad_clear_user + .long 6b, bad_clear_user + .long 7b, bad_clear_user + .long 8b, bad_clear_user + .long 9b, bad_clear_user +.previous + +#if defined(__SH4__) +/* + * __clear_user_page + * @to: P1 address (with same color) + * @orig_to: P1 address + * + * void __clear_user_page(void *to, void *orig_to) + */ + +/* + * r0 --- scratch + * r4 --- to + * r5 --- orig_to + * r6 --- to + 4096 + */ +ENTRY(__clear_user_page) + mov.w .L4096,r0 + mov r4,r6 + add r0,r6 + mov #0,r0 + ! +1: ocbi @r5 + add #32,r5 + movca.l r0,@r4 + mov r4,r1 + add #32,r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + add #28,r4 + cmp/eq r6,r4 + bf/s 1b + ocbwb @r1 + ! + rts + nop +.L4096: .word 4096 + +/************* + unsigned long addr, data; + for (addr = start; addr < start + 4096; addr += 32) { + data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); + if (data == phys) + ctrl_outl(0, addr); + } +*************/ +ENTRY(__flush_cache_4096) + .rept 128 + mov.l @r4,r0 + and r6,r0 + cmp/eq r5,r0 + bf 1f + mov.l r7,@r4 +1: add #32,r4 + .endr + nop + nop + nop + nop + nop + nop + nop + rts + nop +#endif Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 copy_page.S --- arch/sh/mm/copy_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ arch/sh/mm/copy_page.S 27 Mar 2002 05:15:07 -0000 @@ -1,10 +1,11 @@ /* $Id: copy_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ * - * copy_page implementation of SuperH + * copy_page, __copy_user_page implementation of SuperH * * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima * */ +#include <linux/linkage.h> /* * copy_page @@ -21,7 +22,6 @@ * r10 --- to * r11 --- from */ -#include <linux/linkage.h> ENTRY(copy_page) mov.l r8,@-r15 mov.l r10,@-r15 @@ -66,4 +66,67 @@ ENTRY(copy_page) mov.l @r15+,r8 rts nop + +#if defined(__SH4__) +/* + * __copy_user_page + * @to: P1 address (with same color) + * @from: P1 address + * @orig_to: P1 address + * + * void __copy_user_page(void *to, void *from, void *orig_to) + */ + +/* + * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch + * r8 --- from + 4096 + * r9 --- orig_to + * r10 --- to + * r11 --- from + */ +#include <linux/linkage.h> +ENTRY(__copy_user_page) + mov.l r8,@-r15 + mov.l r9,@-r15 + mov.l r10,@-r15 + mov.l r11,@-r15 + mov r4,r10 + mov r5,r11 + mov r6,r9 + mov r5,r8 + mov.w .L4096,r0 + add r0,r8 + ! +1: ocbi @r9 + add #32,r9 + mov.l @r11+,r0 + mov.l @r11+,r1 + mov.l @r11+,r2 + mov.l @r11+,r3 + mov.l @r11+,r4 + mov.l @r11+,r5 + mov.l @r11+,r6 + mov.l @r11+,r7 + movca.l r0,@r10 + mov r10,r0 + add #32,r10 + mov.l r7,@-r10 + mov.l r6,@-r10 + mov.l r5,@-r10 + mov.l r4,@-r10 + mov.l r3,@-r10 + mov.l r2,@-r10 + mov.l r1,@-r10 + ocbwb @r0 + cmp/eq r11,r8 + bf/s 1b + add #28,r10 + ! + mov.l @r15+,r11 + mov.l @r15+,r10 + mov.l @r15+,r9 + mov.l @r15+,r8 + rts + nop +#endif .L4096: .word 4096 |
From: M. R. B. <mr...@0x...> - 2002-03-27 15:02:29
|
* NIIBE Yutaka <gn...@m1...> on Wed, Mar 27, 2002: > Optimized the flushing.=20 >=20 > There is a requirement in SH4 to control cache from P2 area. P2 area > is the area where cache is not used at all. This means, the cycles > for instruction could be quite large. In other word, if we could > save number of instructions, we see performance improvement. >=20 > I've done: Four --> Three instruction fetch per single cache line flush. >=20 Wow ... were you planning on merging these with linux-2_4-branch also? For this and your other changes, do you have numbers to show your improvements? Thanks, M. R. |
From: David M. <dav...@st...> - 2002-03-27 16:40:54
|
gn...@m1... wrote: > > Optimized the flushing. > > There is a requirement in SH4 to control cache from P2 area. P2 area > is the area where cache is not used at all. This means, the cycles > for instruction could be quite large. In other word, if we could > save number of instructions, we see performance improvement. > Niibe-san, One of the differences between the 7750 and 7751 (and ST40) processors is that the restriction to flush D-cache from P2 only has been lifted. This does provide a significant performance increase. We did spend a while trying to find out if this was a documentation change or a real change, but as far as we can tell it does appear to be a genuine change in the core. I ran a modified kernel and saw no problems not doing the change to P2. You have to read the manual really carefully to realise it has changed! -- Dave McKay Software Engineer STMicroelectronics Email: dav...@st... |
From: NIIBE Y. <gn...@m1...> - 2002-03-27 23:46:52
|
M. R. Brown wrote: > Wow ... were you planning on merging these with linux-2_4-branch also? Yes. Besides this, we have important bug fixes (FPU thing, syscall restart) and feature (kernel profiling support), which should be merged into 2.4. > For this and your other changes, do you have numbers to show your > improvements? We need more information, that's the reason I've implemented kernel profiling (at last ;-). I have only rough one. I do: $ time for x in 0 1 2 3 4 5 6 7 8 9; do fo y in 0 1 2 3 4 5 6 7 8 9; \ do sh -c "echo >/dev/null"; done; done to see how exec/exit goes. flush_cache_range was the bad guy which heavily calls flush_cache_4096 or __flush_cache_page. It took about 4sec user, 25sec system with old implementation, while 4sec user, 17sec system with new implementation. It is because of number of instructions spent for cache flushing. On SolutionEngine 7750. BTW, when we change flush_cache_range to just call flush_cache_all, it goes 4sec user and 4sec system. It seems for me that we have no way other than flush_cache_all... -- |
From: NIIBE Y. <gn...@m1...> - 2002-03-28 05:51:46
|
NIIBE Yutaka wrote: > Yes. Besides this, we have important bug fixes (FPU thing, syscall > restart) and feature (kernel profiling support), which should be merged > into 2.4. The backport. Here it is. 2002-03-28 NIIBE Yutaka <gn...@m1...> * include/asm-sh/uaccess.h (strnlen_user, strlen_user): New inline implementation. (__clear_user): Make it external function. * arch/sh/mm/copy_page.S (__copy_user_page): Moved from __copy_user_page-sh4.S. * arch/sh/mm/__copy_user_page-sh4.S: Removed. * arch/sh/mm/clear_page.S (__clear_user, __flush_cache_4096): New function. (__clear_user_page): Moved from __clear_user_page-sh4.S. * arch/sh/mm/__clear_user_page-sh4.S: Removed. * arch/sh/mm/cache-sh4.c (flush_cache_4096): New function. (flush_dcache_page): Removed __flush_dcache_page and merged. (__flush_icache_page, flush_cache_mm, flush_cache_range): Revert the change of 2002-02-27. (__flush_cache_page): New function. (flush_cache_page): Use __flush_cache_page. * arch/sh/kernel/setup.c (setup_arch): Bug fix for FPU initialization. * include/asm-sh/mmu_context.h (switch_mm): Remove setting/resetting of mm->cpu_vm_mask. It's for SMP implementation. (get_new_mmu_context): Removed. (get_mmu_context): Merved with get_new_mmu_context. * include/asm-sh/hw_irq.h (sh_do_profile): Removed from here. * arch/sh/kernel/time.c (sh_do_profile): But implemented here. * include/asm-sh/ptrace.h (struct pt_regs): Renamed syscall_nr to tra. * arch/sh/kernel/process.c (dump_fpu, __switch_to, copy_thread): Don't need to protect from interrupt. (__switch_to, copy_thread): Don't check if it's init_task or not. * arch/sh/kernel/signal.c (do_signal): Don't set regs[0]. Use tra (was: syscall_nr). (restore_sigcontext): Use tra. (handle_signal): Likewise. (save_sigcontext_fpu): (save_sigcontext_fpu): Use __put_user (was: __copy_to_user). Don't need to protect from interrupt. * arch/sh/kernel/entry.S (COMPAT_OLD_SYSCALL_ABI): Removed. (old_abi_system_call): Removed. (OFF_TRA): Renamed from SYSCALL_NR. (system_call): Use OFF_TRA and the value is now tra (was: encoded value). Index: arch/sh/kernel/entry.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/entry.S,v retrieving revision 1.1.1.1.2.2 diff -u -3 -p -r1.1.1.1.2.2 entry.S --- arch/sh/kernel/entry.S 26 Feb 2002 11:42:29 -0000 1.1.1.1.2.2 +++ arch/sh/kernel/entry.S 28 Mar 2002 05:49:28 -0000 @@ -15,12 +15,6 @@ #include <linux/config.h> -/* - * Define this to turn on compatibility with the previous - * system call ABI. This feature is not properly maintained. - */ -#undef COMPAT_OLD_SYSCALL_ABI - ! NOTE: ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address ! to be jumped is too far, but it causes illegal slot exception. @@ -97,7 +91,7 @@ OFF_R6 = 24 /* New ABI: ar OFF_R7 = 28 /* New ABI: arg3 */ OFF_SP = (15*4) OFF_SR = (16*4+8) -SYSCALL_NR = (16*4+6*4) +OFF_TRA = (16*4+6*4) #define k0 r0 @@ -345,29 +339,15 @@ system_call: mov.l @r9, r8 ! ! Is the trap argument >= 0x20? (TRA will be >= 0x80) - mov #0x20, r9 - extu.b r9, r9 - shll2 r9 - cmp/hs r9, r8 + mov #0x7f, r9 + cmp/hi r9, r8 bt debug_trap ! - mov #SYSCALL_NR, r14 + mov #OFF_TRA, r14 add r15, r14 ! -#ifdef COMPAT_OLD_SYSCALL_ABI - mov #0x40, r9 - cmp/hs r9, r8 - bf/s old_abi_system_call - nop -#endif ! New Syscall ABI - add #-0x40, r8 - shlr2 r8 - shll8 r8 - shll8 r8 ! r8 = num_args<<16 - mov r3, r10 - or r8, r10 ! Encode syscall # and # of arguments - mov.l r10, @r14 ! set syscall_nr + mov.l r8, @r14 ! set tra STI() ! stc k_current, r11 @@ -426,74 +406,6 @@ syscall_ret_trace: jmp @r1 ! Call syscall_trace() which notifies superior lds r0, pr ! Then return to ret_from_syscall() - - -#ifdef COMPAT_OLD_SYSCALL_ABI -! Handle old ABI system call. -! Note that ptrace(SYSCALL) is not supported for the old ABI. -! At this point: -! r0, r4-7 as per ABI -! r8 = value of TRA register (= num_args<<2) -! r14 = points to SYSCALL_NR in stack frame -old_abi_system_call: - mov r0, r9 ! Save system call number in r9 - ! ! arrange for return which pops stack - mov.l __old_abi_syscall_ret, r10 - lds r10, pr - ! Build the stack frame if TRA > 0 - mov r8, r10 - cmp/pl r10 - bf 0f - mov.l @(OFF_SP,r15), r0 ! get original user stack -7: add #-4, r10 -4: mov.l @(r0,r10), r1 ! May cause address error exception.. - mov.l r1, @-r15 - cmp/pl r10 - bt 7b -0: - mov.l r9, @r14 ! set syscall_nr - STI() - ! Call the system call handler through the table. - ! First check for bad syscall number - mov.l __n_sys, r10 - cmp/hs r10, r9 - bf 2f - ! Bad syscall number - rts ! return to old_abi_syscall_ret - mov #-ENOSYS, r0 - ! Good syscall number -2: shll2 r9 ! x4 - mov.l __sct, r11 - add r11, r9 - mov.l @r9, r11 - jmp @r11 ! call specific syscall handler, - nop - - .align 2 -__old_abi_syscall_ret: - .long old_abi_syscall_ret - - ! This code gets called on address error exception when copying - ! syscall arguments from user stack to kernel stack. It is - ! supposed to return -EINVAL through old_abi_syscall_ret, but it - ! appears to have been broken for a long time in that the r0 - ! return value will be saved into the kernel stack relative to r15 - ! but the value of r15 is not correct partway through the loop. - ! So the user prog is returned its old r0 value, not -EINVAL. - ! Greg Banks 28 Aug 2000. - .section .fixup,"ax" -fixup_syscall_argerr: - ! First get r15 back to - rts - mov #-EINVAL, r0 - .previous - - .section __ex_table, "a" - .align 2 - .long 4b,fixup_syscall_argerr - .previous -#endif - .align 2 __TRA: .long TRA __syscall_trace: @@ -529,11 +441,6 @@ ret_from_exception: nop .align 2 -#ifdef COMPAT_OLD_SYSCALL_ABI -old_abi_syscall_ret: - add r8, r15 ! pop off the arguments - /* fall through */ -#endif syscall_ret: mov.l r0, @(OFF_R0,r15) ! save the return value /* fall through */ @@ -685,7 +592,7 @@ handle_exception: 9: mov #-1, k4 mov.l 3f, k1 ! Save the user registers on the stack. - mov.l k4, @-r15 ! syscall_nr (default: -1) + mov.l k4, @-r15 ! Set tra (default: -1) ! sts.l macl, @-r15 sts.l mach, @-r15 Index: arch/sh/kernel/process.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/process.c,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 process.c --- arch/sh/kernel/process.c 15 Oct 2001 20:44:51 -0000 1.1.1.1 +++ arch/sh/kernel/process.c 28 Mar 2002 05:49:28 -0000 @@ -179,11 +179,7 @@ int dump_fpu(struct pt_regs *regs, elf_f fpvalid = tsk->used_math; if (fpvalid) { - unsigned long flags; - - save_and_cli(flags); unlazy_fpu(tsk); - restore_flags(flags); memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu)); } @@ -203,15 +199,9 @@ int copy_thread(int nr, unsigned long cl #if defined(__SH4__) struct task_struct *tsk = current; - if (tsk != &init_task) { - unsigned long flags; - - save_and_cli(flags); - unlazy_fpu(tsk); - restore_flags(flags); - p->thread.fpu = current->thread.fpu; - p->used_math = tsk->used_math; - } + unlazy_fpu(tsk); + p->thread.fpu = current->thread.fpu; + p->used_math = tsk->used_math; #endif childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long) p)) - 1; *childregs = *regs; @@ -257,13 +247,7 @@ void dump_thread(struct pt_regs * regs, void __switch_to(struct task_struct *prev, struct task_struct *next) { #if defined(__SH4__) - if (prev != &init_task) { - unsigned long flags; - - save_and_cli(flags); - unlazy_fpu(prev); - restore_flags(flags); - } + unlazy_fpu(prev); #endif /* * Restore the kernel mode register Index: arch/sh/kernel/setup.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/setup.c,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 setup.c --- arch/sh/kernel/setup.c 30 Nov 2001 23:03:33 -0000 1.1.1.1.2.1 +++ arch/sh/kernel/setup.c 28 Mar 2002 05:49:28 -0000 @@ -485,9 +485,8 @@ void __init setup_arch(char **cmdline_p) } #if defined(__SH4__) - /* We already grab/initialized FPU in head.S. Make it consisitent. */ - init_task.used_math = 1; - init_task.flags |= PF_USEDFPU; + init_task.used_math = 0; + init_task.flags &= ~PF_USEDFPU; #endif paging_init(); } Index: arch/sh/kernel/signal.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/signal.c,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 signal.c --- arch/sh/kernel/signal.c 15 Oct 2001 20:44:52 -0000 1.1.1.1 +++ arch/sh/kernel/signal.c 28 Mar 2002 05:49:28 -0000 @@ -196,27 +196,20 @@ static inline int restore_sigcontext_fpu static inline int save_sigcontext_fpu(struct sigcontext *sc) { struct task_struct *tsk = current; - unsigned long flags; - int val; if (!tsk->used_math) { - val = 0; - __copy_to_user(&sc->sc_ownedfp, &val, sizeof(int)); + __put_user(0, &sc->sc_ownedfp); return 0; } - val = 1; - __copy_to_user(&sc->sc_ownedfp, &val, sizeof(int)); + __put_user(1, &sc->sc_ownedfp); /* This will cause a "finit" to be triggered by the next attempted FPU operation by the 'current' process. */ tsk->used_math = 0; - save_and_cli(flags); unlazy_fpu(tsk); - restore_flags(flags); - return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard, sizeof(long)*(16*2+2)); } @@ -255,7 +248,7 @@ restore_sigcontext(struct pt_regs *regs, } #endif - regs->syscall_nr = -1; /* disable syscall checks */ + regs->tra = -1; /* disable syscall checks */ err |= __get_user(*r0_p, &sc->sc_regs[0]); return err; } @@ -524,7 +517,7 @@ handle_signal(unsigned long sig, struct siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) { /* Are we from a system call? */ - if (regs->syscall_nr >= 0) { + if (regs->tra >= 0) { /* If so, check system call restarting.. */ switch (regs->regs[0]) { case -ERESTARTNOHAND: @@ -538,7 +531,6 @@ handle_signal(unsigned long sig, struct } /* fallthrough */ case -ERESTARTNOINTR: - regs->regs[0] = regs->syscall_nr; regs->pc -= 2; } } @@ -685,12 +677,11 @@ int do_signal(struct pt_regs *regs, sigs } /* Did we come from a system call? */ - if (regs->syscall_nr >= 0) { + if (regs->tra >= 0) { /* Restart the system call - no handlers present */ if (regs->regs[0] == -ERESTARTNOHAND || regs->regs[0] == -ERESTARTSYS || regs->regs[0] == -ERESTARTNOINTR) { - regs->regs[0] = regs->syscall_nr; regs->pc -= 2; } } Index: arch/sh/kernel/time.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/time.c,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 time.c --- arch/sh/kernel/time.c 15 Oct 2001 20:44:50 -0000 1.1.1.1 +++ arch/sh/kernel/time.c 28 Mar 2002 05:49:28 -0000 @@ -176,6 +176,25 @@ void do_settimeofday(struct timeval *tv) /* last time the RTC clock got updated */ static long last_rtc_update; +static __inline__ void sh_do_profile (unsigned long pc) +{ + extern int _stext; + + if (!prof_buffer) + return; + + pc -= (unsigned long) &_stext; + pc >>= prof_shift; + /* + * Don't ignore out-of-bounds PC values silently, + * put them into the last histogram slot, so if + * present, they will show up as a sharp peak. + */ + if (pc > prof_len-1) + pc = prof_len-1; + prof_buffer[pc]++; +} + /* * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick Index: arch/sh/mm/Makefile =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/Makefile,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 Makefile --- arch/sh/mm/Makefile 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ arch/sh/mm/Makefile 28 Mar 2002 05:49:28 -0000 @@ -11,7 +11,7 @@ O_TARGET := mm.o obj-y := init.o fault.o extable.o clear_page.o copy_page.o obj-$(CONFIG_CPU_SH3) += cache-sh3.o -obj-$(CONFIG_CPU_SH4) += cache-sh4.o __clear_user_page-sh4.o __copy_user_page-sh4.o ioremap.o +obj-$(CONFIG_CPU_SH4) += cache-sh4.o ioremap.o USE_STANDARD_AS_RULE := true Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.1.1.1.2.3 diff -u -3 -p -r1.1.1.1.2.3 cache-sh4.c --- arch/sh/mm/cache-sh4.c 22 Mar 2002 14:02:09 -0000 1.1.1.1.2.3 +++ arch/sh/mm/cache-sh4.c 28 Mar 2002 05:49:28 -0000 @@ -1,9 +1,8 @@ /* $Id: cache-sh4.c,v 1.1.1.1.2.3 2002/03/22 14:02:09 gniibe Exp $ * - * linux/arch/sh/mm/cache.c + * linux/arch/sh/mm/cache-sh4.c * - * Copyright (C) 1999, 2000 Niibe Yutaka - * Copyright (C) 2001, 2002 Paul Mundt + * Copyright (C) 1999, 2000, 2002 Niibe Yutaka */ #include <linux/config.h> @@ -208,55 +207,34 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } -/* - * Writeback&Invalidate the D-cache of the page - */ -static void __flush_dcache_page(unsigned long phys) -{ - unsigned long addr, data; - unsigned long flags; - - phys |= CACHE_VALID; - - save_and_cli(flags); - jump_to_P2(); - - /* Loop all the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY; - addr < (CACHE_OC_ADDRESS_ARRAY - +(CACHE_OC_NUM_ENTRIES<< CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - - back_to_P1(); - restore_flags(flags); -} - -static void __flush_icache_page(unsigned long phys) +static inline void flush_cache_4096(unsigned long start, + unsigned long phys) { +#if defined(CONFIG_CPU_SUBTYPE_SH7750) + register unsigned long addr __asm__ ("r4"); + register unsigned long data __asm__ ("r0"); + register unsigned long __r5 __asm__ ("r5") = phys; + register unsigned long __r6 __asm__ ("r6") = (0x1ffff000|CACHE_VALID); + register unsigned long __r7 __asm__ ("r7") = 0; + extern void __flush_cache_4096(unsigned long, unsigned long); + + asm volatile("jsr @%1; nop" + : "=r" (addr), "=r" (data) + : "0" (start), "1" (__flush_cache_4096 + 0x20000000), + "r" (__r5), "r" (__r6), "r" (__r7) + : "pr"); +#else + /* + * SH7751 and ST40 have no restriction to handle cache. + * (While SH7750 must do that at P2 area.) + */ unsigned long addr, data; - unsigned long flags; - - phys |= CACHE_VALID; - - save_and_cli(flags); - jump_to_P2(); - - /* Loop all the I-cache */ - for (addr = CACHE_IC_ADDRESS_ARRAY; - addr < (CACHE_IC_ADDRESS_ARRAY - +(CACHE_IC_NUM_ENTRIES<< CACHE_IC_ENTRY_SHIFT)); - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { + for (addr = start; addr < start + 4096; addr += 32) { data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); if (data == phys) ctrl_outl(0, addr); } - - back_to_P1(); - restore_flags(flags); +#endif } /* @@ -265,8 +243,22 @@ static void __flush_icache_page(unsigned */ void flush_dcache_page(struct page *page) { - if (test_bit(PG_mapped, &page->flags)) - __flush_dcache_page(PHYSADDR(page_address(page))); + if (test_bit(PG_mapped, &page->flags)) { + unsigned long phys = PHYSADDR(page_address(page)); + unsigned long flags; + + phys |= CACHE_VALID; + + save_and_cli(flags); + + /* Loop all the D-cache */ + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); + + restore_flags(flags); + } } void flush_cache_all(void) @@ -298,15 +290,43 @@ void flush_cache_mm(struct mm_struct *mm * FIXME: Really, the optimal solution here would be able to flush out * individual lines created by the specified context, but this isn't * feasible for a number of architectures (such as MIPS, and some - * SPARC) .. is this possible for SuperH? (This is a non-issue if the - * SH4 cache is configured in write-through mode). + * SPARC) .. is this possible for SuperH? * - * In the meantime, we'll just flush all of the caches if we have a - * valid mm context.. this seems to be the simplest way to avoid at - * least a few wasted cache flushes. -Lethal + * In the meantime, we'll just flush all of the caches.. this + * seems to be the simplest way to avoid at least a few wasted + * cache flushes. -Lethal */ - if (mm->context != 0) - flush_cache_all(); + flush_cache_all(); +} + +static void __flush_cache_page(struct vm_area_struct *vma, + unsigned long address, + unsigned long phys) +{ + unsigned long flags; + + phys |= CACHE_VALID; + save_and_cli(flags); + + /* We only need to flush D-cache when we have alias */ + if ((address^phys) & CACHE_ALIAS) { + /* Loop 4K of the D-cache */ + flush_cache_4096( + CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), + phys); + /* Loop another 4K of the D-cache */ + flush_cache_4096( + CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), + phys); + } + + if (vma->vm_flags & VM_EXEC) + /* Loop 4K (half) of the I-cache */ + flush_cache_4096( + CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), + phys); + + restore_flags(flags); } /* @@ -321,39 +341,15 @@ void flush_cache_mm(struct mm_struct *mm void flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - unsigned long flags; - - if (mm->context == 0) - return; - - start &= PAGE_MASK; - - if (!find_vma(mm, start)) - return; - if (mm->context != current->active_mm->context) { - flush_cache_all(); - } else { - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - - save_and_cli(flags); - jump_to_P2(); - - for (start; start < end; start += PAGE_SIZE) { - pgd = pgd_offset(mm, start); - pmd = pmd_offset(pgd, start); - pte = pte_offset(pmd, start); - - if (pte_val(*pte) & _PAGE_PRESENT) { - __flush_icache_page(start); - __flush_dcache_page(start); - } - } - - back_to_P1(); - restore_flags(flags); - } + /* + * We could call flush_cache_page for the pages of these + * range, but it's really time consuming (we have to scan the + * caches all the time...). + * + * We can't use A-bit magic, as there's the case we don't have + * valid entry on TLB. + */ + flush_cache_all(); } /* @@ -367,8 +363,7 @@ void flush_cache_page(struct vm_area_str pmd_t *pmd; pte_t *pte; pte_t entry; - unsigned long phys, addr, data; - unsigned long flags; + unsigned long phys; dir = pgd_offset(vma->vm_mm, address); pmd = pmd_offset(dir, address); @@ -376,49 +371,11 @@ void flush_cache_page(struct vm_area_str return; pte = pte_offset(pmd, address); entry = *pte; - if (pte_none(entry) || !pte_present(entry)) + if (!(pte_val(entry) & _PAGE_PRESENT)) return; phys = pte_val(entry)&PTE_PHYS_MASK; - - phys |= CACHE_VALID; - save_and_cli(flags); - jump_to_P2(); - - /* We only need to flush D-cache when we have alias */ - if ((address^phys) & CACHE_ALIAS) { - /* Loop 4K of the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS); - addr < (CACHE_OC_ADDRESS_ARRAY + (address & CACHE_ALIAS) - +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - /* Loop another 4K of the D-cache */ - for (addr = CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS); - addr < (CACHE_OC_ADDRESS_ARRAY + (phys & CACHE_ALIAS) - +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - } - - if (vma->vm_flags & VM_EXEC) - /* Loop 4K of the I-cache */ - for (addr = CACHE_IC_ADDRESS_ARRAY|(address&0x1000); - addr < ((CACHE_IC_ADDRESS_ARRAY|(address&0x1000)) - +(CACHE_IC_NUM_ENTRIES/2<<CACHE_IC_ENTRY_SHIFT)); - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } - back_to_P1(); - restore_flags(flags); + __flush_cache_page(vma, address, phys); } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 clear_page.S --- arch/sh/mm/clear_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ arch/sh/mm/clear_page.S 28 Mar 2002 05:49:28 -0000 @@ -1,10 +1,13 @@ /* $Id: clear_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ * - * clear_page implementation of SuperH + * __clear_user_page, __clear_user, clear_page implementation of SuperH * - * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima + * Copyright (C) 2001 Kaz Kojima + * Copyright (C) 2001, 2002 Niibe Yutaka * */ +#include <linux/config.h> +#include <linux/linkage.h> /* * clear_page @@ -18,7 +21,6 @@ * r4 --- to * r5 --- to + 4096 */ -#include <linux/linkage.h> ENTRY(clear_page) mov r4,r5 mov.w .Llimit,r0 @@ -50,3 +52,154 @@ ENTRY(clear_page) rts nop .Llimit: .word (4096-28) + +ENTRY(__clear_user) + ! + mov #0, r0 + mov #0xe0, r1 ! 0xffffffe0 + ! + ! r4..r4&~32 -------- not aligned [ Area 0 ] + ! r4&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] + ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] + ! + ! Clear area 0 + mov r4, r2 + and r1, r2 + cmp/eq r4, r2 + bt/s area1 + mov r4, r3 + sub r2, r3 + mov r4, r2 + ! +l0: dt r3 +0: mov.b r0, @r2 + bf/s l0 + add #1, r2 + ! + mov r4, r3 + add r5, r3 + and r1, r3 + ! + ! Clear area 1 +area1: +#if defined(__SH4__) +1: movca.l r0, @r2 +#else +1: mov.l r0, @r2 +#endif + add #4, r2 +2: mov.l r0, @r2 + add #4, r2 +3: mov.l r0, @r2 + add #4, r2 +4: mov.l r0, @r2 + add #4, r2 +5: mov.l r0, @r2 + add #4, r2 +6: mov.l r0, @r2 + add #4, r2 +7: mov.l r0, @r2 + add #4, r2 +8: mov.l r0, @r2 + add #4, r2 + cmp/hi r2, r3 + bt/s 1b + nop + ! + ! Clear area 2 + add r5, r4 + cmp/eq r4, r2 + bt/s done + sub r2, r4 +l2: dt r4 +9: mov.b r0, @r2 + bf/s l2 + add #1, r2 + ! +done: rts + nop ! return 0 as normal return + + ! return the number of bytes remained +bad_clear_user: + mov r4, r0 + mov r5, r0 + rts + sub r2, r0 + +.section __ex_table,"a" + .align 2 + .long 0b, bad_clear_user + .long 1b, bad_clear_user + .long 2b, bad_clear_user + .long 3b, bad_clear_user + .long 4b, bad_clear_user + .long 5b, bad_clear_user + .long 6b, bad_clear_user + .long 7b, bad_clear_user + .long 8b, bad_clear_user + .long 9b, bad_clear_user +.previous + +#if defined(__SH4__) +/* + * __clear_user_page + * @to: P1 address (with same color) + * @orig_to: P1 address + * + * void __clear_user_page(void *to, void *orig_to) + */ + +/* + * r0 --- scratch + * r4 --- to + * r5 --- orig_to + * r6 --- to + 4096 + */ +ENTRY(__clear_user_page) + mov.w .L4096,r0 + mov r4,r6 + add r0,r6 + mov #0,r0 + ! +1: ocbi @r5 + add #32,r5 + movca.l r0,@r4 + mov r4,r1 + add #32,r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + mov.l r0,@-r4 + add #28,r4 + cmp/eq r6,r4 + bf/s 1b + ocbwb @r1 + ! + rts + nop +.L4096: .word 4096 + +#if defined(CONFIG_CPU_SUBTYPE_SH7750) +ENTRY(__flush_cache_4096) + .rept 128 + mov.l @r4,r0 + and r6,r0 + cmp/eq r5,r0 + bf 1f + mov.l r7,@r4 +1: add #32,r4 + .endr + nop + nop + nop + nop + nop + nop + nop + rts + nop +#endif +#endif Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 copy_page.S --- arch/sh/mm/copy_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 +++ arch/sh/mm/copy_page.S 28 Mar 2002 05:49:28 -0000 @@ -1,10 +1,11 @@ /* $Id: copy_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ * - * copy_page implementation of SuperH + * copy_page, __copy_user_page implementation of SuperH * * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima * */ +#include <linux/linkage.h> /* * copy_page @@ -21,7 +22,6 @@ * r10 --- to * r11 --- from */ -#include <linux/linkage.h> ENTRY(copy_page) mov.l r8,@-r15 mov.l r10,@-r15 @@ -66,4 +66,67 @@ ENTRY(copy_page) mov.l @r15+,r8 rts nop + +#if defined(__SH4__) +/* + * __copy_user_page + * @to: P1 address (with same color) + * @from: P1 address + * @orig_to: P1 address + * + * void __copy_user_page(void *to, void *from, void *orig_to) + */ + +/* + * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch + * r8 --- from + 4096 + * r9 --- orig_to + * r10 --- to + * r11 --- from + */ +#include <linux/linkage.h> +ENTRY(__copy_user_page) + mov.l r8,@-r15 + mov.l r9,@-r15 + mov.l r10,@-r15 + mov.l r11,@-r15 + mov r4,r10 + mov r5,r11 + mov r6,r9 + mov r5,r8 + mov.w .L4096,r0 + add r0,r8 + ! +1: ocbi @r9 + add #32,r9 + mov.l @r11+,r0 + mov.l @r11+,r1 + mov.l @r11+,r2 + mov.l @r11+,r3 + mov.l @r11+,r4 + mov.l @r11+,r5 + mov.l @r11+,r6 + mov.l @r11+,r7 + movca.l r0,@r10 + mov r10,r0 + add #32,r10 + mov.l r7,@-r10 + mov.l r6,@-r10 + mov.l r5,@-r10 + mov.l r4,@-r10 + mov.l r3,@-r10 + mov.l r2,@-r10 + mov.l r1,@-r10 + ocbwb @r0 + cmp/eq r11,r8 + bf/s 1b + add #28,r10 + ! + mov.l @r15+,r11 + mov.l @r15+,r10 + mov.l @r15+,r9 + mov.l @r15+,r8 + rts + nop +#endif .L4096: .word 4096 Index: include/asm-sh/hw_irq.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/hw_irq.h,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 hw_irq.h --- include/asm-sh/hw_irq.h 15 Oct 2001 20:45:08 -0000 1.1.1.1 +++ include/asm-sh/hw_irq.h 28 Mar 2002 05:49:28 -0000 @@ -1,6 +1,4 @@ #ifndef __ASM_SH_HW_IRQ_H #define __ASM_SH_HW_IRQ_H -static __inline__ void sh_do_profile (unsigned long pc) {/*Not implemented yet*/} - static __inline__ void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { /* Nothing to do */ } #endif /* __ASM_SH_HW_IRQ_H */ Index: include/asm-sh/mmu_context.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/mmu_context.h,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 mmu_context.h --- include/asm-sh/mmu_context.h 15 Oct 2001 20:45:10 -0000 1.1.1.1 +++ include/asm-sh/mmu_context.h 28 Mar 2002 05:49:28 -0000 @@ -29,19 +29,32 @@ extern unsigned long mmu_context_cache; */ #define MMU_VPN_MASK 0xfffff000 +/* + * Get MMU context if needed. + */ static __inline__ void -get_new_mmu_context(struct mm_struct *mm) +get_mmu_context(struct mm_struct *mm) { extern void flush_tlb_all(void); + unsigned long mc = mmu_context_cache; - unsigned long mc = ++mmu_context_cache; + /* Check if we have old version of context. */ + if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0) + /* It's up to date, do nothing */ + return; + /* It's old, we need to get new context with new version. */ + mc = ++mmu_context_cache; if (!(mc & MMU_CONTEXT_ASID_MASK)) { - /* We exhaust ASID of this version. - Flush all TLB and start new cycle. */ + /* + * We exhaust ASID of this version. + * Flush all TLB and start new cycle. + */ flush_tlb_all(); - /* Fix version if needed. - Note that we avoid version #0 to distingush NO_CONTEXT. */ + /* + * Fix version; Note that we avoid version #0 + * to distingush NO_CONTEXT. + */ if (!mc) mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; } @@ -49,21 +62,6 @@ get_new_mmu_context(struct mm_struct *mm } /* - * Get MMU context if needed. - */ -static __inline__ void -get_mmu_context(struct mm_struct *mm) -{ - if (mm) { - unsigned long mc = mmu_context_cache; - /* Check if we have old version of context. - If it's old, we need to get new context with new version. */ - if ((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) - get_new_mmu_context(mm); - } -} - -/* * Initialize the context related info for a new mm_struct * instance. */ @@ -169,8 +167,6 @@ static __inline__ void switch_mm(struct if (prev != next) { unsigned long __pgdir = (unsigned long)next->pgd; - clear_bit(cpu, &prev->cpu_vm_mask); - set_bit(cpu, &next->cpu_vm_mask); __asm__ __volatile__("mov.l %0, %1" : /* no output */ : "r" (__pgdir), "m" (__m(MMU_TTB))); @@ -185,4 +181,5 @@ static __inline__ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) { } + #endif /* __ASM_SH_MMU_CONTEXT_H */ Index: include/asm-sh/ptrace.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/ptrace.h,v retrieving revision 1.1.1.1 diff -u -3 -p -r1.1.1.1 ptrace.h --- include/asm-sh/ptrace.h 15 Oct 2001 20:45:11 -0000 1.1.1.1 +++ include/asm-sh/ptrace.h 28 Mar 2002 05:49:28 -0000 @@ -61,7 +61,7 @@ struct pt_regs { unsigned long gbr; unsigned long mach; unsigned long macl; - long syscall_nr; + long tra; }; #ifdef __KERNEL__ Index: include/asm-sh/uaccess.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/uaccess.h,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 uaccess.h --- include/asm-sh/uaccess.h 3 Nov 2001 00:52:47 -0000 1.1.1.1.2.1 +++ include/asm-sh/uaccess.h 28 Mar 2002 05:49:28 -0000 @@ -308,39 +308,11 @@ __copy_res; }) __copy_user((void *)(to), \ (void *)(from), n) -/* XXX: Not sure it works well.. - should be such that: 4byte clear and the rest. */ -static __inline__ __kernel_size_t -__clear_user(void *addr, __kernel_size_t size) -{ - unsigned long __a; - - __asm__ __volatile__( - "9:\n\t" - "dt %0\n" - "1:\n\t" - "mov.b %4, @%1\n\t" - "bf/s 9b\n\t" - " add #1, %1\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3:\n\t" - "mov.l 4f, %1\n\t" - "jmp @%1\n\t" - " nop\n" - ".balign 4\n" - "4: .long 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 1b,3b\n" - ".previous" - : "=r" (size), "=r" (__a) - : "0" (size), "1" (addr), "r" (0) - : "memory", "t"); - - return size; -} +/* + * Clear the area and return remaining number of bytes + * (on failure. Usually it's 0.) + */ +extern __kernel_size_t __clear_user(void *addr, __kernel_size_t size); #define clear_user(addr,n) ({ \ void * __cl_addr = (addr); \ @@ -396,8 +368,6 @@ if(__access_ok(__sfu_src, __sfu_count)) __sfu_res = __strncpy_from_user((unsigned long) (dest), __sfu_src, __sfu_count); \ } __sfu_res; }) -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) - /* * Return the size of a string (including the ending 0!) */ @@ -436,10 +406,18 @@ static __inline__ long __strnlen_user(co static __inline__ long strnlen_user(const char *s, long n) { - if (!__addr_ok(s)) + if (!access_ok(VERIFY_READ, s, n)) return 0; else return __strnlen_user(s, n); +} + +static __inline__ long strlen_user(const char *s) +{ + if (!access_ok(VERIFY_READ, s, 0)) + return 0; + else + return __strnlen_user(s, ~0UL >> 1); } struct exception_table_entry |
From: Dustin M. <du...@se...> - 2002-03-29 18:44:15
|
I've just tried Niibe-san's backport of the cache changes for the 2.4 kernel. I'm running on a SH7751 based system, and I've run into some trouble. It seems I get a kernel oops (Segmentaton fault) in __flush_cache_page when using the /bin/ps binary. The oops goes away if I simply force the CONFIG_CPU_SUBTYPE_SH7750 defines to true and use the 7750 cache method. I'm still working to getting a better idea what is causing the oops. Dustin. > -----Original Message----- > From: lin...@li... > [mailto:lin...@li...]On Behalf Of NIIBE > Yutaka > Sent: Wednesday, March 27, 2002 9:52 PM > To: lin...@li... > Subject: Re: [linuxsh-dev] cache-sh4.c > > > NIIBE Yutaka wrote: > > Yes. Besides this, we have important bug fixes (FPU thing, syscall > > restart) and feature (kernel profiling support), which should be merged > > into 2.4. > > The backport. > > Here it is. > > 2002-03-28 NIIBE Yutaka <gn...@m1...> > > * include/asm-sh/uaccess.h (strnlen_user, strlen_user): New inline > implementation. > (__clear_user): Make it external function. > > * arch/sh/mm/copy_page.S (__copy_user_page): Moved from > __copy_user_page-sh4.S. > * arch/sh/mm/__copy_user_page-sh4.S: Removed. > > * arch/sh/mm/clear_page.S (__clear_user, __flush_cache_4096): New > function. > (__clear_user_page): Moved from __clear_user_page-sh4.S. > * arch/sh/mm/__clear_user_page-sh4.S: Removed. > > * arch/sh/mm/cache-sh4.c (flush_cache_4096): New function. > (flush_dcache_page): Removed __flush_dcache_page and merged. > (__flush_icache_page, flush_cache_mm, flush_cache_range): Revert the > change of 2002-02-27. > (__flush_cache_page): New function. > (flush_cache_page): Use __flush_cache_page. > > * arch/sh/kernel/setup.c (setup_arch): Bug fix for FPU > initialization. > > * include/asm-sh/mmu_context.h (switch_mm): Remove setting/resetting > of mm->cpu_vm_mask. It's for SMP implementation. > (get_new_mmu_context): Removed. > (get_mmu_context): Merved with get_new_mmu_context. > > * include/asm-sh/hw_irq.h (sh_do_profile): Removed from here. > * arch/sh/kernel/time.c (sh_do_profile): But implemented here. > > * include/asm-sh/ptrace.h (struct pt_regs): Renamed syscall_nr to > tra. > > * arch/sh/kernel/process.c (dump_fpu, __switch_to, copy_thread): > Don't need to protect from interrupt. > (__switch_to, copy_thread): Don't check if it's init_task or not. > > * arch/sh/kernel/signal.c (do_signal): Don't set regs[0]. > Use tra (was: syscall_nr). > (restore_sigcontext): Use tra. > (handle_signal): Likewise. > (save_sigcontext_fpu): > (save_sigcontext_fpu): Use __put_user (was: __copy_to_user). > Don't need to protect from interrupt. > > * arch/sh/kernel/entry.S (COMPAT_OLD_SYSCALL_ABI): Removed. > (old_abi_system_call): Removed. > (OFF_TRA): Renamed from SYSCALL_NR. > (system_call): Use OFF_TRA and the value is now tra (was: encoded > value). > > Index: arch/sh/kernel/entry.S > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/entry.S,v > retrieving revision 1.1.1.1.2.2 > diff -u -3 -p -r1.1.1.1.2.2 entry.S > --- arch/sh/kernel/entry.S 26 Feb 2002 11:42:29 -0000 1.1.1.1.2.2 > +++ arch/sh/kernel/entry.S 28 Mar 2002 05:49:28 -0000 > @@ -15,12 +15,6 @@ > #include <linux/config.h> > > > -/* > - * Define this to turn on compatibility with the previous > - * system call ABI. This feature is not properly maintained. > - */ > -#undef COMPAT_OLD_SYSCALL_ABI > - > ! NOTE: > ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address > ! to be jumped is too far, but it causes illegal slot exception. > @@ -97,7 +91,7 @@ OFF_R6 = 24 /* New ABI: ar > OFF_R7 = 28 /* New ABI: arg3 */ > OFF_SP = (15*4) > OFF_SR = (16*4+8) > -SYSCALL_NR = (16*4+6*4) > +OFF_TRA = (16*4+6*4) > > > #define k0 r0 > @@ -345,29 +339,15 @@ system_call: > mov.l @r9, r8 > ! > ! Is the trap argument >= 0x20? (TRA will be >= 0x80) > - mov #0x20, r9 > - extu.b r9, r9 > - shll2 r9 > - cmp/hs r9, r8 > + mov #0x7f, r9 > + cmp/hi r9, r8 > bt debug_trap > ! > - mov #SYSCALL_NR, r14 > + mov #OFF_TRA, r14 > add r15, r14 > ! > -#ifdef COMPAT_OLD_SYSCALL_ABI > - mov #0x40, r9 > - cmp/hs r9, r8 > - bf/s old_abi_system_call > - nop > -#endif > ! New Syscall ABI > - add #-0x40, r8 > - shlr2 r8 > - shll8 r8 > - shll8 r8 ! r8 = num_args<<16 > - mov r3, r10 > - or r8, r10 ! Encode syscall # and # of arguments > - mov.l r10, @r14 ! set syscall_nr > + mov.l r8, @r14 ! set tra > STI() > ! > stc k_current, r11 > @@ -426,74 +406,6 @@ syscall_ret_trace: > jmp @r1 ! Call syscall_trace() which notifies superior > lds r0, pr ! Then return to ret_from_syscall() > > - > - > -#ifdef COMPAT_OLD_SYSCALL_ABI > -! Handle old ABI system call. > -! Note that ptrace(SYSCALL) is not supported for the old ABI. > -! At this point: > -! r0, r4-7 as per ABI > -! r8 = value of TRA register (= num_args<<2) > -! r14 = points to SYSCALL_NR in stack frame > -old_abi_system_call: > - mov r0, r9 ! Save system call number in r9 > - ! ! arrange for return which > pops stack > - mov.l __old_abi_syscall_ret, r10 > - lds r10, pr > - ! Build the stack frame if TRA > 0 > - mov r8, r10 > - cmp/pl r10 > - bf 0f > - mov.l @(OFF_SP,r15), r0 ! get original user stack > -7: add #-4, r10 > -4: mov.l @(r0,r10), r1 ! May cause address error exception.. > - mov.l r1, @-r15 > - cmp/pl r10 > - bt 7b > -0: > - mov.l r9, @r14 ! set syscall_nr > - STI() > - ! Call the system call handler through the table. > - ! First check for bad syscall number > - mov.l __n_sys, r10 > - cmp/hs r10, r9 > - bf 2f > - ! Bad syscall number > - rts ! return to old_abi_syscall_ret > - mov #-ENOSYS, r0 > - ! Good syscall number > -2: shll2 r9 ! x4 > - mov.l __sct, r11 > - add r11, r9 > - mov.l @r9, r11 > - jmp @r11 ! call specific syscall handler, > - nop > - > - .align 2 > -__old_abi_syscall_ret: > - .long old_abi_syscall_ret > - > - ! This code gets called on address error exception when copying > - ! syscall arguments from user stack to kernel stack. It is > - ! supposed to return -EINVAL through old_abi_syscall_ret, but it > - ! appears to have been broken for a long time in that the r0 > - ! return value will be saved into the kernel stack relative to r15 > - ! but the value of r15 is not correct partway through the loop. > - ! So the user prog is returned its old r0 value, not -EINVAL. > - ! Greg Banks 28 Aug 2000. > - .section .fixup,"ax" > -fixup_syscall_argerr: > - ! First get r15 back to > - rts > - mov #-EINVAL, r0 > - .previous > - > - .section __ex_table, "a" > - .align 2 > - .long 4b,fixup_syscall_argerr > - .previous > -#endif > - > .align 2 > __TRA: .long TRA > __syscall_trace: > @@ -529,11 +441,6 @@ ret_from_exception: > nop > > .align 2 > -#ifdef COMPAT_OLD_SYSCALL_ABI > -old_abi_syscall_ret: > - add r8, r15 ! pop off the arguments > - /* fall through */ > -#endif > syscall_ret: > mov.l r0, @(OFF_R0,r15) ! save the return value > /* fall through */ > @@ -685,7 +592,7 @@ handle_exception: > 9: mov #-1, k4 > mov.l 3f, k1 > ! Save the user registers on the stack. > - mov.l k4, @-r15 ! syscall_nr (default: -1) > + mov.l k4, @-r15 ! Set tra (default: -1) > ! > sts.l macl, @-r15 > sts.l mach, @-r15 > Index: arch/sh/kernel/process.c > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/process.c,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 process.c > --- arch/sh/kernel/process.c 15 Oct 2001 20:44:51 -0000 1.1.1.1 > +++ arch/sh/kernel/process.c 28 Mar 2002 05:49:28 -0000 > @@ -179,11 +179,7 @@ int dump_fpu(struct pt_regs *regs, elf_f > > fpvalid = tsk->used_math; > if (fpvalid) { > - unsigned long flags; > - > - save_and_cli(flags); > unlazy_fpu(tsk); > - restore_flags(flags); > memcpy(fpu, &tsk->thread.fpu.hard, sizeof(*fpu)); > } > > @@ -203,15 +199,9 @@ int copy_thread(int nr, unsigned long cl > #if defined(__SH4__) > struct task_struct *tsk = current; > > - if (tsk != &init_task) { > - unsigned long flags; > - > - save_and_cli(flags); > - unlazy_fpu(tsk); > - restore_flags(flags); > - p->thread.fpu = current->thread.fpu; > - p->used_math = tsk->used_math; > - } > + unlazy_fpu(tsk); > + p->thread.fpu = current->thread.fpu; > + p->used_math = tsk->used_math; > #endif > childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned > long) p)) - 1; > *childregs = *regs; > @@ -257,13 +247,7 @@ void dump_thread(struct pt_regs * regs, > void __switch_to(struct task_struct *prev, struct task_struct *next) > { > #if defined(__SH4__) > - if (prev != &init_task) { > - unsigned long flags; > - > - save_and_cli(flags); > - unlazy_fpu(prev); > - restore_flags(flags); > - } > + unlazy_fpu(prev); > #endif > /* > * Restore the kernel mode register > Index: arch/sh/kernel/setup.c > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/setup.c,v > retrieving revision 1.1.1.1.2.1 > diff -u -3 -p -r1.1.1.1.2.1 setup.c > --- arch/sh/kernel/setup.c 30 Nov 2001 23:03:33 -0000 1.1.1.1.2.1 > +++ arch/sh/kernel/setup.c 28 Mar 2002 05:49:28 -0000 > @@ -485,9 +485,8 @@ void __init setup_arch(char **cmdline_p) > } > > #if defined(__SH4__) > - /* We already grab/initialized FPU in head.S. Make it > consisitent. */ > - init_task.used_math = 1; > - init_task.flags |= PF_USEDFPU; > + init_task.used_math = 0; > + init_task.flags &= ~PF_USEDFPU; > #endif > paging_init(); > } > Index: arch/sh/kernel/signal.c > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/signal.c,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 signal.c > --- arch/sh/kernel/signal.c 15 Oct 2001 20:44:52 -0000 1.1.1.1 > +++ arch/sh/kernel/signal.c 28 Mar 2002 05:49:28 -0000 > @@ -196,27 +196,20 @@ static inline int restore_sigcontext_fpu > static inline int save_sigcontext_fpu(struct sigcontext *sc) > { > struct task_struct *tsk = current; > - unsigned long flags; > - int val; > > if (!tsk->used_math) { > - val = 0; > - __copy_to_user(&sc->sc_ownedfp, &val, sizeof(int)); > + __put_user(0, &sc->sc_ownedfp); > return 0; > } > > - val = 1; > - __copy_to_user(&sc->sc_ownedfp, &val, sizeof(int)); > + __put_user(1, &sc->sc_ownedfp); > > /* This will cause a "finit" to be triggered by the next > attempted FPU operation by the 'current' process. > */ > tsk->used_math = 0; > > - save_and_cli(flags); > unlazy_fpu(tsk); > - restore_flags(flags); > - > return __copy_to_user(&sc->sc_fpregs[0], &tsk->thread.fpu.hard, > sizeof(long)*(16*2+2)); > } > @@ -255,7 +248,7 @@ restore_sigcontext(struct pt_regs *regs, > } > #endif > > - regs->syscall_nr = -1; /* disable syscall checks */ > + regs->tra = -1; /* disable syscall checks */ > err |= __get_user(*r0_p, &sc->sc_regs[0]); > return err; > } > @@ -524,7 +517,7 @@ handle_signal(unsigned long sig, struct > siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) > { > /* Are we from a system call? */ > - if (regs->syscall_nr >= 0) { > + if (regs->tra >= 0) { > /* If so, check system call restarting.. */ > switch (regs->regs[0]) { > case -ERESTARTNOHAND: > @@ -538,7 +531,6 @@ handle_signal(unsigned long sig, struct > } > /* fallthrough */ > case -ERESTARTNOINTR: > - regs->regs[0] = regs->syscall_nr; > regs->pc -= 2; > } > } > @@ -685,12 +677,11 @@ int do_signal(struct pt_regs *regs, sigs > } > > /* Did we come from a system call? */ > - if (regs->syscall_nr >= 0) { > + if (regs->tra >= 0) { > /* Restart the system call - no handlers present */ > if (regs->regs[0] == -ERESTARTNOHAND || > regs->regs[0] == -ERESTARTSYS || > regs->regs[0] == -ERESTARTNOINTR) { > - regs->regs[0] = regs->syscall_nr; > regs->pc -= 2; > } > } > Index: arch/sh/kernel/time.c > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/kernel/time.c,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 time.c > --- arch/sh/kernel/time.c 15 Oct 2001 20:44:50 -0000 1.1.1.1 > +++ arch/sh/kernel/time.c 28 Mar 2002 05:49:28 -0000 > @@ -176,6 +176,25 @@ void do_settimeofday(struct timeval *tv) > /* last time the RTC clock got updated */ > static long last_rtc_update; > > +static __inline__ void sh_do_profile (unsigned long pc) > +{ > + extern int _stext; > + > + if (!prof_buffer) > + return; > + > + pc -= (unsigned long) &_stext; > + pc >>= prof_shift; > + /* > + * Don't ignore out-of-bounds PC values silently, > + * put them into the last histogram slot, so if > + * present, they will show up as a sharp peak. > + */ > + if (pc > prof_len-1) > + pc = prof_len-1; > + prof_buffer[pc]++; > +} > + > /* > * timer_interrupt() needs to keep up the real-time clock, > * as well as call the "do_timer()" routine every clocktick > Index: arch/sh/mm/Makefile > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/Makefile,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 Makefile > --- arch/sh/mm/Makefile 15 Oct 2001 20:44:53 -0000 1.1.1.1 > +++ arch/sh/mm/Makefile 28 Mar 2002 05:49:28 -0000 > @@ -11,7 +11,7 @@ O_TARGET := mm.o > obj-y := init.o fault.o extable.o clear_page.o copy_page.o > > obj-$(CONFIG_CPU_SH3) += cache-sh3.o > -obj-$(CONFIG_CPU_SH4) += cache-sh4.o __clear_user_page-sh4.o > __copy_user_page-sh4.o ioremap.o > +obj-$(CONFIG_CPU_SH4) += cache-sh4.o ioremap.o > > USE_STANDARD_AS_RULE := true > > Index: arch/sh/mm/cache-sh4.c > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v > retrieving revision 1.1.1.1.2.3 > diff -u -3 -p -r1.1.1.1.2.3 cache-sh4.c > --- arch/sh/mm/cache-sh4.c 22 Mar 2002 14:02:09 -0000 1.1.1.1.2.3 > +++ arch/sh/mm/cache-sh4.c 28 Mar 2002 05:49:28 -0000 > @@ -1,9 +1,8 @@ > /* $Id: cache-sh4.c,v 1.1.1.1.2.3 2002/03/22 14:02:09 gniibe Exp $ > * > - * linux/arch/sh/mm/cache.c > + * linux/arch/sh/mm/cache-sh4.c > * > - * Copyright (C) 1999, 2000 Niibe Yutaka > - * Copyright (C) 2001, 2002 Paul Mundt > + * Copyright (C) 1999, 2000, 2002 Niibe Yutaka > */ > > #include <linux/config.h> > @@ -208,55 +207,34 @@ void flush_cache_sigtramp(unsigned long > restore_flags(flags); > } > > -/* > - * Writeback&Invalidate the D-cache of the page > - */ > -static void __flush_dcache_page(unsigned long phys) > -{ > - unsigned long addr, data; > - unsigned long flags; > - > - phys |= CACHE_VALID; > - > - save_and_cli(flags); > - jump_to_P2(); > - > - /* Loop all the D-cache */ > - for (addr = CACHE_OC_ADDRESS_ARRAY; > - addr < (CACHE_OC_ADDRESS_ARRAY > - +(CACHE_OC_NUM_ENTRIES<< CACHE_OC_ENTRY_SHIFT)); > - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { > - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); > - if (data == phys) > - ctrl_outl(0, addr); > - } > - > - back_to_P1(); > - restore_flags(flags); > -} > - > -static void __flush_icache_page(unsigned long phys) > +static inline void flush_cache_4096(unsigned long start, > + unsigned long phys) > { > +#if defined(CONFIG_CPU_SUBTYPE_SH7750) > + register unsigned long addr __asm__ ("r4"); > + register unsigned long data __asm__ ("r0"); > + register unsigned long __r5 __asm__ ("r5") = phys; > + register unsigned long __r6 __asm__ ("r6") = > (0x1ffff000|CACHE_VALID); > + register unsigned long __r7 __asm__ ("r7") = 0; > + extern void __flush_cache_4096(unsigned long, unsigned long); > + > + asm volatile("jsr @%1; nop" > + : "=r" (addr), "=r" (data) > + : "0" (start), "1" (__flush_cache_4096 + 0x20000000), > + "r" (__r5), "r" (__r6), "r" (__r7) > + : "pr"); > +#else > + /* > + * SH7751 and ST40 have no restriction to handle cache. > + * (While SH7750 must do that at P2 area.) > + */ > unsigned long addr, data; > - unsigned long flags; > - > - phys |= CACHE_VALID; > - > - save_and_cli(flags); > - jump_to_P2(); > - > - /* Loop all the I-cache */ > - for (addr = CACHE_IC_ADDRESS_ARRAY; > - addr < (CACHE_IC_ADDRESS_ARRAY > - +(CACHE_IC_NUM_ENTRIES<< CACHE_IC_ENTRY_SHIFT)); > - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { > + for (addr = start; addr < start + 4096; addr += 32) { > data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); > if (data == phys) > ctrl_outl(0, addr); > } > - > - back_to_P1(); > - restore_flags(flags); > +#endif > } > > /* > @@ -265,8 +243,22 @@ static void __flush_icache_page(unsigned > */ > void flush_dcache_page(struct page *page) > { > - if (test_bit(PG_mapped, &page->flags)) > - __flush_dcache_page(PHYSADDR(page_address(page))); > + if (test_bit(PG_mapped, &page->flags)) { > + unsigned long phys = PHYSADDR(page_address(page)); > + unsigned long flags; > + > + phys |= CACHE_VALID; > + > + save_and_cli(flags); > + > + /* Loop all the D-cache */ > + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY, phys); > + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x1000, phys); > + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x2000, phys); > + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | 0x3000, phys); > + > + restore_flags(flags); > + } > } > > void flush_cache_all(void) > @@ -298,15 +290,43 @@ void flush_cache_mm(struct mm_struct *mm > * FIXME: Really, the optimal solution here would be able > to flush out > * individual lines created by the specified context, but this isn't > * feasible for a number of architectures (such as MIPS, and some > - * SPARC) .. is this possible for SuperH? (This is a > non-issue if the > - * SH4 cache is configured in write-through mode). > + * SPARC) .. is this possible for SuperH? > * > - * In the meantime, we'll just flush all of the caches if we have a > - * valid mm context.. this seems to be the simplest way to avoid at > - * least a few wasted cache flushes. -Lethal > + * In the meantime, we'll just flush all of the caches.. this > + * seems to be the simplest way to avoid at least a few wasted > + * cache flushes. -Lethal > */ > - if (mm->context != 0) > - flush_cache_all(); > + flush_cache_all(); > +} > + > +static void __flush_cache_page(struct vm_area_struct *vma, > + unsigned long address, > + unsigned long phys) > +{ > + unsigned long flags; > + > + phys |= CACHE_VALID; > + save_and_cli(flags); > + > + /* We only need to flush D-cache when we have alias */ > + if ((address^phys) & CACHE_ALIAS) { > + /* Loop 4K of the D-cache */ > + flush_cache_4096( > + CACHE_OC_ADDRESS_ARRAY | (address & CACHE_ALIAS), > + phys); > + /* Loop another 4K of the D-cache */ > + flush_cache_4096( > + CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS), > + phys); > + } > + > + if (vma->vm_flags & VM_EXEC) > + /* Loop 4K (half) of the I-cache */ > + flush_cache_4096( > + CACHE_IC_ADDRESS_ARRAY | (address & 0x1000), > + phys); > + > + restore_flags(flags); > } > > /* > @@ -321,39 +341,15 @@ void flush_cache_mm(struct mm_struct *mm > void flush_cache_range(struct mm_struct *mm, unsigned long start, > unsigned long end) > { > - unsigned long flags; > - > - if (mm->context == 0) > - return; > - > - start &= PAGE_MASK; > - > - if (!find_vma(mm, start)) > - return; > - if (mm->context != current->active_mm->context) { > - flush_cache_all(); > - } else { > - pgd_t *pgd; > - pmd_t *pmd; > - pte_t *pte; > - > - save_and_cli(flags); > - jump_to_P2(); > - > - for (start; start < end; start += PAGE_SIZE) { > - pgd = pgd_offset(mm, start); > - pmd = pmd_offset(pgd, start); > - pte = pte_offset(pmd, start); > - > - if (pte_val(*pte) & _PAGE_PRESENT) { > - __flush_icache_page(start); > - __flush_dcache_page(start); > - } > - } > - > - back_to_P1(); > - restore_flags(flags); > - } > + /* > + * We could call flush_cache_page for the pages of these > + * range, but it's really time consuming (we have to scan the > + * caches all the time...). > + * > + * We can't use A-bit magic, as there's the case we don't have > + * valid entry on TLB. > + */ > + flush_cache_all(); > } > > /* > @@ -367,8 +363,7 @@ void flush_cache_page(struct vm_area_str > pmd_t *pmd; > pte_t *pte; > pte_t entry; > - unsigned long phys, addr, data; > - unsigned long flags; > + unsigned long phys; > > dir = pgd_offset(vma->vm_mm, address); > pmd = pmd_offset(dir, address); > @@ -376,49 +371,11 @@ void flush_cache_page(struct vm_area_str > return; > pte = pte_offset(pmd, address); > entry = *pte; > - if (pte_none(entry) || !pte_present(entry)) > + if (!(pte_val(entry) & _PAGE_PRESENT)) > return; > > phys = pte_val(entry)&PTE_PHYS_MASK; > - > - phys |= CACHE_VALID; > - save_and_cli(flags); > - jump_to_P2(); > - > - /* We only need to flush D-cache when we have alias */ > - if ((address^phys) & CACHE_ALIAS) { > - /* Loop 4K of the D-cache */ > - for (addr = CACHE_OC_ADDRESS_ARRAY | (address & > CACHE_ALIAS); > - addr < (CACHE_OC_ADDRESS_ARRAY + (address & > CACHE_ALIAS) > - > +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); > - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { > - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); > - if (data == phys) > - ctrl_outl(0, addr); > - } > - /* Loop another 4K of the D-cache */ > - for (addr = CACHE_OC_ADDRESS_ARRAY | (phys & CACHE_ALIAS); > - addr < (CACHE_OC_ADDRESS_ARRAY + (phys & CACHE_ALIAS) > - > +(CACHE_OC_NUM_ENTRIES/4<<CACHE_OC_ENTRY_SHIFT)); > - addr += (1<<CACHE_OC_ENTRY_SHIFT)) { > - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); > - if (data == phys) > - ctrl_outl(0, addr); > - } > - } > - > - if (vma->vm_flags & VM_EXEC) > - /* Loop 4K of the I-cache */ > - for (addr = CACHE_IC_ADDRESS_ARRAY|(address&0x1000); > - addr < ((CACHE_IC_ADDRESS_ARRAY|(address&0x1000)) > - > +(CACHE_IC_NUM_ENTRIES/2<<CACHE_IC_ENTRY_SHIFT)); > - addr += (1<<CACHE_IC_ENTRY_SHIFT)) { > - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); > - if (data == phys) > - ctrl_outl(0, addr); > - } > - back_to_P1(); > - restore_flags(flags); > + __flush_cache_page(vma, address, phys); > } > > /* > Index: arch/sh/mm/clear_page.S > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 clear_page.S > --- arch/sh/mm/clear_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 > +++ arch/sh/mm/clear_page.S 28 Mar 2002 05:49:28 -0000 > @@ -1,10 +1,13 @@ > /* $Id: clear_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ > * > - * clear_page implementation of SuperH > + * __clear_user_page, __clear_user, clear_page implementation of SuperH > * > - * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima > + * Copyright (C) 2001 Kaz Kojima > + * Copyright (C) 2001, 2002 Niibe Yutaka > * > */ > +#include <linux/config.h> > +#include <linux/linkage.h> > > /* > * clear_page > @@ -18,7 +21,6 @@ > * r4 --- to > * r5 --- to + 4096 > */ > -#include <linux/linkage.h> > ENTRY(clear_page) > mov r4,r5 > mov.w .Llimit,r0 > @@ -50,3 +52,154 @@ ENTRY(clear_page) > rts > nop > .Llimit: .word (4096-28) > + > +ENTRY(__clear_user) > + ! > + mov #0, r0 > + mov #0xe0, r1 ! 0xffffffe0 > + ! > + ! r4..r4&~32 -------- not aligned [ Area 0 ] > + ! r4&~32..(r4+r5)&~32 -------- aligned [ Area 1 ] > + ! (r4+r5)&~32..r4+r5 -------- not aligned [ Area 2 ] > + ! > + ! Clear area 0 > + mov r4, r2 > + and r1, r2 > + cmp/eq r4, r2 > + bt/s area1 > + mov r4, r3 > + sub r2, r3 > + mov r4, r2 > + ! > +l0: dt r3 > +0: mov.b r0, @r2 > + bf/s l0 > + add #1, r2 > + ! > + mov r4, r3 > + add r5, r3 > + and r1, r3 > + ! > + ! Clear area 1 > +area1: > +#if defined(__SH4__) > +1: movca.l r0, @r2 > +#else > +1: mov.l r0, @r2 > +#endif > + add #4, r2 > +2: mov.l r0, @r2 > + add #4, r2 > +3: mov.l r0, @r2 > + add #4, r2 > +4: mov.l r0, @r2 > + add #4, r2 > +5: mov.l r0, @r2 > + add #4, r2 > +6: mov.l r0, @r2 > + add #4, r2 > +7: mov.l r0, @r2 > + add #4, r2 > +8: mov.l r0, @r2 > + add #4, r2 > + cmp/hi r2, r3 > + bt/s 1b > + nop > + ! > + ! Clear area 2 > + add r5, r4 > + cmp/eq r4, r2 > + bt/s done > + sub r2, r4 > +l2: dt r4 > +9: mov.b r0, @r2 > + bf/s l2 > + add #1, r2 > + ! > +done: rts > + nop ! return 0 as normal return > + > + ! return the number of bytes remained > +bad_clear_user: > + mov r4, r0 > + mov r5, r0 > + rts > + sub r2, r0 > + > +.section __ex_table,"a" > + .align 2 > + .long 0b, bad_clear_user > + .long 1b, bad_clear_user > + .long 2b, bad_clear_user > + .long 3b, bad_clear_user > + .long 4b, bad_clear_user > + .long 5b, bad_clear_user > + .long 6b, bad_clear_user > + .long 7b, bad_clear_user > + .long 8b, bad_clear_user > + .long 9b, bad_clear_user > +.previous > + > +#if defined(__SH4__) > +/* > + * __clear_user_page > + * @to: P1 address (with same color) > + * @orig_to: P1 address > + * > + * void __clear_user_page(void *to, void *orig_to) > + */ > + > +/* > + * r0 --- scratch > + * r4 --- to > + * r5 --- orig_to > + * r6 --- to + 4096 > + */ > +ENTRY(__clear_user_page) > + mov.w .L4096,r0 > + mov r4,r6 > + add r0,r6 > + mov #0,r0 > + ! > +1: ocbi @r5 > + add #32,r5 > + movca.l r0,@r4 > + mov r4,r1 > + add #32,r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + mov.l r0,@-r4 > + add #28,r4 > + cmp/eq r6,r4 > + bf/s 1b > + ocbwb @r1 > + ! > + rts > + nop > +.L4096: .word 4096 > + > +#if defined(CONFIG_CPU_SUBTYPE_SH7750) > +ENTRY(__flush_cache_4096) > + .rept 128 > + mov.l @r4,r0 > + and r6,r0 > + cmp/eq r5,r0 > + bf 1f > + mov.l r7,@r4 > +1: add #32,r4 > + .endr > + nop > + nop > + nop > + nop > + nop > + nop > + nop > + rts > + nop > +#endif > +#endif > Index: arch/sh/mm/copy_page.S > =================================================================== > RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 copy_page.S > --- arch/sh/mm/copy_page.S 15 Oct 2001 20:44:53 -0000 1.1.1.1 > +++ arch/sh/mm/copy_page.S 28 Mar 2002 05:49:28 -0000 > @@ -1,10 +1,11 @@ > /* $Id: copy_page.S,v 1.1.1.1 2001/10/15 20:44:53 mrbrown Exp $ > * > - * copy_page implementation of SuperH > + * copy_page, __copy_user_page implementation of SuperH > * > * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima > * > */ > +#include <linux/linkage.h> > > /* > * copy_page > @@ -21,7 +22,6 @@ > * r10 --- to > * r11 --- from > */ > -#include <linux/linkage.h> > ENTRY(copy_page) > mov.l r8,@-r15 > mov.l r10,@-r15 > @@ -66,4 +66,67 @@ ENTRY(copy_page) > mov.l @r15+,r8 > rts > nop > + > +#if defined(__SH4__) > +/* > + * __copy_user_page > + * @to: P1 address (with same color) > + * @from: P1 address > + * @orig_to: P1 address > + * > + * void __copy_user_page(void *to, void *from, void *orig_to) > + */ > + > +/* > + * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch > + * r8 --- from + 4096 > + * r9 --- orig_to > + * r10 --- to > + * r11 --- from > + */ > +#include <linux/linkage.h> > +ENTRY(__copy_user_page) > + mov.l r8,@-r15 > + mov.l r9,@-r15 > + mov.l r10,@-r15 > + mov.l r11,@-r15 > + mov r4,r10 > + mov r5,r11 > + mov r6,r9 > + mov r5,r8 > + mov.w .L4096,r0 > + add r0,r8 > + ! > +1: ocbi @r9 > + add #32,r9 > + mov.l @r11+,r0 > + mov.l @r11+,r1 > + mov.l @r11+,r2 > + mov.l @r11+,r3 > + mov.l @r11+,r4 > + mov.l @r11+,r5 > + mov.l @r11+,r6 > + mov.l @r11+,r7 > + movca.l r0,@r10 > + mov r10,r0 > + add #32,r10 > + mov.l r7,@-r10 > + mov.l r6,@-r10 > + mov.l r5,@-r10 > + mov.l r4,@-r10 > + mov.l r3,@-r10 > + mov.l r2,@-r10 > + mov.l r1,@-r10 > + ocbwb @r0 > + cmp/eq r11,r8 > + bf/s 1b > + add #28,r10 > + ! > + mov.l @r15+,r11 > + mov.l @r15+,r10 > + mov.l @r15+,r9 > + mov.l @r15+,r8 > + rts > + nop > +#endif > .L4096: .word 4096 > Index: include/asm-sh/hw_irq.h > =================================================================== > RCS file: /cvsroot/linuxsh/linux/include/asm-sh/hw_irq.h,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 hw_irq.h > --- include/asm-sh/hw_irq.h 15 Oct 2001 20:45:08 -0000 1.1.1.1 > +++ include/asm-sh/hw_irq.h 28 Mar 2002 05:49:28 -0000 > @@ -1,6 +1,4 @@ > #ifndef __ASM_SH_HW_IRQ_H > #define __ASM_SH_HW_IRQ_H > -static __inline__ void sh_do_profile (unsigned long pc) {/*Not > implemented yet*/} > - > static __inline__ void hw_resend_irq(struct hw_interrupt_type > *h, unsigned int i) { /* Nothing to do */ } > #endif /* __ASM_SH_HW_IRQ_H */ > Index: include/asm-sh/mmu_context.h > =================================================================== > RCS file: /cvsroot/linuxsh/linux/include/asm-sh/mmu_context.h,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 mmu_context.h > --- include/asm-sh/mmu_context.h 15 Oct 2001 20:45:10 -0000 > 1.1.1.1 > +++ include/asm-sh/mmu_context.h 28 Mar 2002 05:49:28 -0000 > @@ -29,19 +29,32 @@ extern unsigned long mmu_context_cache; > */ > #define MMU_VPN_MASK 0xfffff000 > > +/* > + * Get MMU context if needed. > + */ > static __inline__ void > -get_new_mmu_context(struct mm_struct *mm) > +get_mmu_context(struct mm_struct *mm) > { > extern void flush_tlb_all(void); > + unsigned long mc = mmu_context_cache; > > - unsigned long mc = ++mmu_context_cache; > + /* Check if we have old version of context. */ > + if (((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) == 0) > + /* It's up to date, do nothing */ > + return; > > + /* It's old, we need to get new context with new version. */ > + mc = ++mmu_context_cache; > if (!(mc & MMU_CONTEXT_ASID_MASK)) { > - /* We exhaust ASID of this version. > - Flush all TLB and start new cycle. */ > + /* > + * We exhaust ASID of this version. > + * Flush all TLB and start new cycle. > + */ > flush_tlb_all(); > - /* Fix version if needed. > - Note that we avoid version #0 to distingush > NO_CONTEXT. */ > + /* > + * Fix version; Note that we avoid version #0 > + * to distingush NO_CONTEXT. > + */ > if (!mc) > mmu_context_cache = mc = MMU_CONTEXT_FIRST_VERSION; > } > @@ -49,21 +62,6 @@ get_new_mmu_context(struct mm_struct *mm > } > > /* > - * Get MMU context if needed. > - */ > -static __inline__ void > -get_mmu_context(struct mm_struct *mm) > -{ > - if (mm) { > - unsigned long mc = mmu_context_cache; > - /* Check if we have old version of context. > - If it's old, we need to get new context with new > version. */ > - if ((mm->context ^ mc) & MMU_CONTEXT_VERSION_MASK) > - get_new_mmu_context(mm); > - } > -} > - > -/* > * Initialize the context related info for a new mm_struct > * instance. > */ > @@ -169,8 +167,6 @@ static __inline__ void switch_mm(struct > if (prev != next) { > unsigned long __pgdir = (unsigned long)next->pgd; > > - clear_bit(cpu, &prev->cpu_vm_mask); > - set_bit(cpu, &next->cpu_vm_mask); > __asm__ __volatile__("mov.l %0, %1" > : /* no output */ > : "r" (__pgdir), "m" (__m(MMU_TTB))); > @@ -185,4 +181,5 @@ static __inline__ void > enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, > unsigned cpu) > { > } > + > #endif /* __ASM_SH_MMU_CONTEXT_H */ > Index: include/asm-sh/ptrace.h > =================================================================== > RCS file: /cvsroot/linuxsh/linux/include/asm-sh/ptrace.h,v > retrieving revision 1.1.1.1 > diff -u -3 -p -r1.1.1.1 ptrace.h > --- include/asm-sh/ptrace.h 15 Oct 2001 20:45:11 -0000 1.1.1.1 > +++ include/asm-sh/ptrace.h 28 Mar 2002 05:49:28 -0000 > @@ -61,7 +61,7 @@ struct pt_regs { > unsigned long gbr; > unsigned long mach; > unsigned long macl; > - long syscall_nr; > + long tra; > }; > > #ifdef __KERNEL__ > Index: include/asm-sh/uaccess.h > =================================================================== > RCS file: /cvsroot/linuxsh/linux/include/asm-sh/uaccess.h,v > retrieving revision 1.1.1.1.2.1 > diff -u -3 -p -r1.1.1.1.2.1 uaccess.h > --- include/asm-sh/uaccess.h 3 Nov 2001 00:52:47 -0000 1.1.1.1.2.1 > +++ include/asm-sh/uaccess.h 28 Mar 2002 05:49:28 -0000 > @@ -308,39 +308,11 @@ __copy_res; }) > __copy_user((void *)(to), \ > (void *)(from), n) > > -/* XXX: Not sure it works well.. > - should be such that: 4byte clear and the rest. */ > -static __inline__ __kernel_size_t > -__clear_user(void *addr, __kernel_size_t size) > -{ > - unsigned long __a; > - > - __asm__ __volatile__( > - "9:\n\t" > - "dt %0\n" > - "1:\n\t" > - "mov.b %4, @%1\n\t" > - "bf/s 9b\n\t" > - " add #1, %1\n" > - "2:\n" > - ".section .fixup,\"ax\"\n" > - "3:\n\t" > - "mov.l 4f, %1\n\t" > - "jmp @%1\n\t" > - " nop\n" > - ".balign 4\n" > - "4: .long 2b\n" > - ".previous\n" > - ".section __ex_table,\"a\"\n" > - " .balign 4\n" > - " .long 1b,3b\n" > - ".previous" > - : "=r" (size), "=r" (__a) > - : "0" (size), "1" (addr), "r" (0) > - : "memory", "t"); > - > - return size; > -} > +/* > + * Clear the area and return remaining number of bytes > + * (on failure. Usually it's 0.) > + */ > +extern __kernel_size_t __clear_user(void *addr, __kernel_size_t size); > > #define clear_user(addr,n) ({ \ > void * __cl_addr = (addr); \ > @@ -396,8 +368,6 @@ if(__access_ok(__sfu_src, __sfu_count)) > __sfu_res = __strncpy_from_user((unsigned long) (dest), > __sfu_src, __sfu_count); \ > } __sfu_res; }) > > -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) > - > /* > * Return the size of a string (including the ending 0!) > */ > @@ -436,10 +406,18 @@ static __inline__ long __strnlen_user(co > > static __inline__ long strnlen_user(const char *s, long n) > { > - if (!__addr_ok(s)) > + if (!access_ok(VERIFY_READ, s, n)) > return 0; > else > return __strnlen_user(s, n); > +} > + > +static __inline__ long strlen_user(const char *s) > +{ > + if (!access_ok(VERIFY_READ, s, 0)) > + return 0; > + else > + return __strnlen_user(s, ~0UL >> 1); > } > > struct exception_table_entry > > _______________________________________________ > linuxsh-dev mailing list > lin...@li... > https://lists.sourceforge.net/lists/listinfo/linuxsh-dev > > |
From: NIIBE Y. <gn...@m1...> - 2002-04-01 00:59:37
|
Dustin McIntire wrote: > I've just tried Niibe-san's backport of the cache changes for the 2.4 > kernel. I'm running on a SH7751 based system, and I've run into some > trouble. It seems I get a kernel oops (Segmentaton fault) in > __flush_cache_page when using the /bin/ps binary. The oops goes away if I > simply force the CONFIG_CPU_SUBTYPE_SH7750 defines to true and use the 7750 > cache method. I'm still working to getting a better idea what is causing > the oops. Thanks for the testing. Umm... I've read the hardware manual of SH7750 and SH7751. I couldn't find the difference wrt the description of P2 requirement of cache handling. I'll test this with SH7751 SolutionEngine. IIRC, Takashi Yoshii also has mentioned this. I'm not sure if it's future plan or actual implementation, though. David, I'd like to confirm. Is it OK for ST40STB1? -- |
From: NIIBE Y. <gn...@m1...> - 2002-04-01 01:58:18
|
Here's the change. 2002-04-01 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_4096_all, flush_cache_4096): Changed the compile time condition for ST40. SH7751 needs to be P2 when handling cache. * arch/sh/mm/clear_page.S (__flush_cache_4096_all): Renamed from __flush_cache_4096_nocheck. Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.10 diff -u -3 -p -r1.10 cache-sh4.c --- arch/sh/mm/cache-sh4.c 1 Apr 2002 01:29:43 -0000 1.10 +++ arch/sh/mm/cache-sh4.c 1 Apr 2002 01:55:09 -0000 @@ -208,37 +208,48 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } -static void flush_cache_4096_nocheck(unsigned long start) +static void flush_cache_4096_all(unsigned long start) { -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if defined(CONFIG_CPU_SUBTYPE_ST40STB1) + /* + * ST40 have no restriction to handle cache. + * (While SH7750/SH7751 must do that at P2 area.) + */ + unsigned long addr; + for (addr = start; addr < start + 4096; addr += 32) + ctrl_outl(0, addr); +#else register unsigned long __r0 __asm__ ("r0") = 0; register unsigned long __r1 __asm__ ("r1") = 128; register unsigned long __r4 __asm__ ("r4"); register unsigned long __r5 __asm__ ("r5"); register unsigned long __r6 __asm__ ("r6"); register unsigned long __r7 __asm__ ("r7"); - extern void __flush_cache_4096_nocheck(unsigned long); + extern void __flush_cache_4096_all(unsigned long); asm volatile("jsr @%7; nop" : "=&r" (__r4), "=&r" (__r5), "=&r" (__r6), "=&r" (__r7) : "0" (start), "r" (__r0), "r" (__r1), - "r" (__flush_cache_4096_nocheck + 0x20000000) + "r" (__flush_cache_4096_all + 0x20000000) : "pr"); -#else - /* - * SH7751 and ST40 have no restriction to handle cache. - * (While SH7750 must do that at P2 area.) - */ - unsigned long addr; - for (addr = start; addr < start + 4096; addr += 32) - ctrl_outl(0, addr); #endif } static inline void flush_cache_4096(unsigned long start, unsigned long phys) { -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if defined(CONFIG_CPU_SUBTYPE_ST40STB1) + /* + * ST40 have no restriction to handle cache. + * (While SH7750/SH7751 must do that at P2 area.) + */ + unsigned long addr, data; + for (addr = start; addr < start + 4096; addr += 32) { + data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); + if (data == phys) + ctrl_outl(0, addr); + } +#else register unsigned long addr __asm__ ("r4"); register unsigned long data __asm__ ("r0"); register unsigned long __r5 __asm__ ("r5") = phys; @@ -251,17 +262,6 @@ static inline void flush_cache_4096(unsi : "0" (start), "1" (__flush_cache_4096 + 0x20000000), "r" (__r5), "r" (__r6), "r" (__r7) : "pr"); -#else - /* - * SH7751 and ST40 have no restriction to handle cache. - * (While SH7750 must do that at P2 area.) - */ - unsigned long addr, data; - for (addr = start; addr < start + 4096; addr += 32) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } #endif } @@ -301,13 +301,15 @@ static inline void flush_icache_all(void restore_flags(flags); } +#undef C_IMPLEMENTATION_OF_CACHE_ALL + void flush_cache_all(void) { extern unsigned long empty_zero_page[1024]; /* Prefetch the data to write back D-cache */ -#if C_IMPLEMENTATION +#ifdef C_IMPLEMENTATION_OF_CACHE_ALL unsigned long addr; for (addr = (unsigned long)empty_zero_page; @@ -435,13 +437,13 @@ void flush_cache_range(struct vm_area_st } while (p < end); loop_exit: if (d & 1) - flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY); + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY); if (d & 2) - flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x1000); + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x1000); if (d & 4) - flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x2000); + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x2000); if (d & 8) - flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x3000); + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x3000); if (vma->vm_flags & VM_EXEC) flush_icache_all(); } Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.4 diff -u -3 -p -r1.4 clear_page.S --- arch/sh/mm/clear_page.S 1 Apr 2002 01:29:43 -0000 1.4 +++ arch/sh/mm/clear_page.S 1 Apr 2002 01:55:09 -0000 @@ -182,7 +182,7 @@ ENTRY(__clear_user_page) nop .L4096: .word 4096 -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if !defined(CONFIG_CPU_SUBTYPE_ST40STB1) /* SH7750 or SH7751 */ ENTRY(__flush_cache_4096) .rept 128 mov.l @r4,r0 @@ -202,7 +202,7 @@ ENTRY(__flush_cache_4096) rts nop -ENTRY(__flush_cache_4096_nocheck) +ENTRY(__flush_cache_4096_all) mov r4,r5 mov r4,r6 mov r4,r7 Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.2 diff -u -3 -p -r1.2 copy_page.S --- arch/sh/mm/copy_page.S 27 Mar 2002 09:30:48 -0000 1.2 +++ arch/sh/mm/copy_page.S 1 Apr 2002 01:55:09 -0000 @@ -84,7 +84,6 @@ ENTRY(copy_page) * r10 --- to * r11 --- from */ -#include <linux/linkage.h> ENTRY(__copy_user_page) mov.l r8,@-r15 mov.l r9,@-r15 |
From: NIIBE Y. <gn...@m1...> - 2002-04-01 02:17:14
|
And here's the change for 2.4. 2002-04-01 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_4096_all): New function. (flush_icache_all): New function. (flush_cache_4096): Changed the ifdef condition only for ST40STB1. (flush_cache_all, flush_cache_range): New implementation using flush_icache_all and flush_cache_4096_all. * arch/sh/mm/clear_page.S (__flush_cache_4096_all): New function. Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.1.1.1.2.4 diff -u -3 -p -r1.1.1.1.2.4 cache-sh4.c --- arch/sh/mm/cache-sh4.c 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.4 +++ arch/sh/mm/cache-sh4.c 1 Apr 2002 02:13:31 -0000 @@ -207,10 +207,48 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } +static void flush_cache_4096_all(unsigned long start) +{ +#if defined(CONFIG_CPU_SUBTYPE_ST40STB1) + /* + * ST40 have no restriction to handle cache. + * (While SH7750/SH7751 must do that at P2 area.) + */ + unsigned long addr; + for (addr = start; addr < start + 4096; addr += 32) + ctrl_outl(0, addr); +#else + register unsigned long __r0 __asm__ ("r0") = 0; + register unsigned long __r1 __asm__ ("r1") = 128; + register unsigned long __r4 __asm__ ("r4"); + register unsigned long __r5 __asm__ ("r5"); + register unsigned long __r6 __asm__ ("r6"); + register unsigned long __r7 __asm__ ("r7"); + extern void __flush_cache_4096_all(unsigned long); + + asm volatile("jsr @%7; nop" + : "=&r" (__r4), "=&r" (__r5), "=&r" (__r6), "=&r" (__r7) + : "0" (start), "r" (__r0), "r" (__r1), + "r" (__flush_cache_4096_all + 0x20000000) + : "pr"); +#endif +} + static inline void flush_cache_4096(unsigned long start, unsigned long phys) { -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if defined(CONFIG_CPU_SUBTYPE_ST40STB1) + /* + * ST40 have no restriction to handle cache. + * (While SH7750/SH7751 must do that at P2 area.) + */ + unsigned long addr, data; + for (addr = start; addr < start + 4096; addr += 32) { + data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); + if (data == phys) + ctrl_outl(0, addr); + } +#else register unsigned long addr __asm__ ("r4"); register unsigned long data __asm__ ("r0"); register unsigned long __r5 __asm__ ("r5") = phys; @@ -223,17 +261,6 @@ static inline void flush_cache_4096(unsi : "0" (start), "1" (__flush_cache_4096 + 0x20000000), "r" (__r5), "r" (__r6), "r" (__r7) : "pr"); -#else - /* - * SH7751 and ST40 have no restriction to handle cache. - * (While SH7750 must do that at P2 area.) - */ - unsigned long addr, data; - for (addr = start; addr < start + 4096; addr += 32) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } #endif } @@ -261,25 +288,56 @@ void flush_dcache_page(struct page *page } } -void flush_cache_all(void) +static inline void flush_icache_all(void) { - extern unsigned long empty_zero_page[1024]; unsigned long flags; - unsigned long addr; save_and_cli(flags); + jump_to_P2(); + /* Flush I-cache */ + ctrl_outl(CCR_CACHE_VAL|CCR_CACHE_ICI, CCR); + back_to_P1(); + restore_flags(flags); +} + +#undef C_IMPLEMENTATION_OF_CACHE_ALL + +void flush_cache_all(void) +{ + extern unsigned long empty_zero_page[1024]; /* Prefetch the data to write back D-cache */ + +#ifdef C_IMPLEMENTATION_OF_CACHE_ALL + unsigned long addr; + for (addr = (unsigned long)empty_zero_page; addr < (unsigned long)empty_zero_page + 1024*16; addr += L1_CACHE_BYTES) asm volatile("pref @%0"::"r" (addr)); - - jump_to_P2(); - /* Flush D-cache/I-cache */ - ctrl_outl(CCR_CACHE_INIT, CCR); - back_to_P1(); - restore_flags(flags); +#else + unsigned long a0, a1, a2, a3, cnt; + asm volatile( + "mov %0, %1; add #32, %1\n\t" + "mov %0, %2; add #64, %2\n\t" + "mov %1, %3; add #64, %3\n\t" + "1:\n\t" + "pref @%0\n\t" + "dt %4\n\t" + "pref @%1\n\t" + "add %5, %0\n\t" + "pref @%2\n\t" + "add %5, %1\n\t" + "pref @%3\n\t" + "add %5, %2\n\t" + "bf/s 1b\n\t" + " add %5, %3" + : "=&r" (a0), "=&r" (a1), "=&r" (a2), "=&r" (a3), "=&r" (cnt) + : "r" (32*4), "0" (empty_zero_page), "4" (1024*16/32/4) + : "t"); + } +#endif + flush_icache_all(); } void flush_cache_mm(struct mm_struct *mm) @@ -341,15 +399,51 @@ static void __flush_cache_page(struct vm void flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * We could call flush_cache_page for the pages of these - * range, but it's really time consuming (we have to scan the - * caches all the time...). - * - * We can't use A-bit magic, as there's the case we don't have - * valid entry on TLB. - */ - flush_cache_all(); + unsigned long p = start & PAGE_MASK; + pgd_t *dir; + pmd_t *pmd; + pte_t *pte; + pte_t entry; + unsigned long phys; + unsigned long d = 0; + + dir = pgd_offset(mm, p); + pmd = pmd_offset(dir, p); + + do { + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + p &= ~((1 << PMD_SHIFT) -1); + p += (1 << PMD_SHIFT); + pmd++; + continue; + } + pte = pte_offset(pmd, p); + do { + entry = *pte; + if ((pte_val(entry) & _PAGE_PRESENT)) { + phys = pte_val(entry)&PTE_PHYS_MASK; + if ((p^phys) & CACHE_ALIAS) { + d |= 1 << ((p & CACHE_ALIAS)>>12); + d |= 1 << ((phys & CACHE_ALIAS)>>12); + if (d == 0x0f) + goto loop_exit; + } + } + pte++; + p += PAGE_SIZE; + } while (p < end && (unsigned long)pte & PAGE_MASK); + pmd++; + } while (p < end); + loop_exit: + if (d & 1) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY); + if (d & 2) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x1000); + if (d & 4) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x2000); + if (d & 8) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x3000); + flush_icache_all(); } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 clear_page.S --- arch/sh/mm/clear_page.S 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.1 +++ arch/sh/mm/clear_page.S 1 Apr 2002 02:13:31 -0000 @@ -182,7 +182,7 @@ ENTRY(__clear_user_page) nop .L4096: .word 4096 -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if !defined(CONFIG_CPU_SUBTYPE_ST40STB1) /* SH7750 or SH7751 */ ENTRY(__flush_cache_4096) .rept 128 mov.l @r4,r0 @@ -191,6 +191,33 @@ ENTRY(__flush_cache_4096) bf 1f mov.l r7,@r4 1: add #32,r4 + .endr + nop + nop + nop + nop + nop + nop + nop + rts + nop + +ENTRY(__flush_cache_4096_all) + mov r4,r5 + mov r4,r6 + mov r4,r7 + add #32,r5 + add #-64,r6 + add #-32,r7 + .rept 32 + mov.l r0,@r4 + add r1,r6 + mov.l r0,@r5 + add r1,r7 + mov.l r0,@r6 + add r1,r4 + mov.l r0,@r7 + add r1,r5 .endr nop nop Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 copy_page.S --- arch/sh/mm/copy_page.S 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.1 +++ arch/sh/mm/copy_page.S 1 Apr 2002 02:13:31 -0000 @@ -84,7 +84,6 @@ ENTRY(copy_page) * r10 --- to * r11 --- from */ -#include <linux/linkage.h> ENTRY(__copy_user_page) mov.l r8,@-r15 mov.l r9,@-r15 |
From: David M. <dav...@st...> - 2002-04-02 14:23:18
|
gn...@m1... wrote: > > Thanks for the testing. Umm... I've read the hardware manual of SH7750 and > SH7751. I couldn't find the difference wrt the description of P2 requirement > of cache handling. I'll test this with SH7751 SolutionEngine. > Niibe-san, It is in chapter 4 of the hardware manual, section 4.5 "Memory-mapped Cache Configuration". It says "the OC content can be read and written by a P1 and P2 area program ......". > IIRC, Takashi Yoshii also has mentioned this. I'm not sure if it's > future plan or actual implementation, though. > I believe it is implementation. > David, I'd like to confirm. Is it OK for ST40STB1? Yup, the ST40 is based on the later core version, so it should be fine. -- Dave McKay Software Engineer STMicroelectronics Email: dav...@st... |
From: NIIBE Y. <gn...@m1...> - 2002-04-03 02:24:14
|
David McKay wrote: > It is in chapter 4 of the hardware manual, section 4.5 > "Memory-mapped Cache Configuration". It says "the OC content can be read > and > written by a P1 and P2 area program ......". Thank you. I've now understood. OC can be handled at P1. IC should be handled at P2. So, here is the change for 2.4 (not applying 2002-04-01 change). I'll commit this soon. 2002-04-03 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_4096_all): New function. (flush_icache_all): New function. (flush_cache_4096): Changed the ifdef condition and I-cache handling. (flush_cache_all, flush_cache_range): New implementation using flush_icache_all and flush_cache_4096_all. * arch/sh/mm/clear_page.S (__flush_cache_4096_all): New function. Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.1.1.1.2.4 diff -u -3 -p -r1.1.1.1.2.4 cache-sh4.c --- arch/sh/mm/cache-sh4.c 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.4 +++ arch/sh/mm/cache-sh4.c 3 Apr 2002 02:21:06 -0000 @@ -207,10 +207,51 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } +static void flush_cache_4096_all(unsigned long start) +{ +#if defined(CONFIG_CPU_SUBTYPE_SH7751) || defined(CONFIG_CPU_SUBTYPE_ST40STB1) + /* + * SH7751 and ST40 have no restriction to handle cache. + * (While SH7750 must do that at P2 area.) + */ + unsigned long addr; + for (addr = start; addr < start + 4096; addr += 32) + ctrl_outl(0, addr); +#else + register unsigned long __r0 __asm__ ("r0") = 0; + register unsigned long __r1 __asm__ ("r1") = 128; + register unsigned long __r4 __asm__ ("r4"); + register unsigned long __r5 __asm__ ("r5"); + register unsigned long __r6 __asm__ ("r6"); + register unsigned long __r7 __asm__ ("r7"); + extern void __flush_cache_4096_all(unsigned long); + + asm volatile("jsr @%7; nop" + : "=&r" (__r4), "=&r" (__r5), "=&r" (__r6), "=&r" (__r7) + : "0" (start), "r" (__r0), "r" (__r1), + "r" (__flush_cache_4096_all + 0x20000000) + : "pr"); +#endif +} + static inline void flush_cache_4096(unsigned long start, unsigned long phys) { -#if defined(CONFIG_CPU_SUBTYPE_SH7750) +#if defined(CONFIG_CPU_SUBTYPE_SH7751) || defined(CONFIG_CPU_SUBTYPE_ST40STB1) + if (start >= CACHE_OC_ADDRESS_ARRAY) { + /* + * SH7751 and ST40 have no restriction to handle cache. + * (While SH7750 must do that at P2 area.) + */ + unsigned long addr, data; + for (addr = start; addr < start + 4096; addr += 32) { + data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); + if (data == phys) + ctrl_outl(0, addr); + } + } else +#endif + { register unsigned long addr __asm__ ("r4"); register unsigned long data __asm__ ("r0"); register unsigned long __r5 __asm__ ("r5") = phys; @@ -223,18 +264,7 @@ static inline void flush_cache_4096(unsi : "0" (start), "1" (__flush_cache_4096 + 0x20000000), "r" (__r5), "r" (__r6), "r" (__r7) : "pr"); -#else - /* - * SH7751 and ST40 have no restriction to handle cache. - * (While SH7750 must do that at P2 area.) - */ - unsigned long addr, data; - for (addr = start; addr < start + 4096; addr += 32) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); } -#endif } /* @@ -261,25 +291,55 @@ void flush_dcache_page(struct page *page } } -void flush_cache_all(void) +static inline void flush_icache_all(void) { - extern unsigned long empty_zero_page[1024]; unsigned long flags; - unsigned long addr; save_and_cli(flags); + jump_to_P2(); + /* Flush I-cache */ + ctrl_outl(CCR_CACHE_VAL|CCR_CACHE_ICI, CCR); + back_to_P1(); + restore_flags(flags); +} + +#undef C_IMPLEMENTATION_OF_CACHE_ALL + +void flush_cache_all(void) +{ + extern unsigned long empty_zero_page[1024]; /* Prefetch the data to write back D-cache */ + +#ifdef C_IMPLEMENTATION_OF_CACHE_ALL + unsigned long addr; + for (addr = (unsigned long)empty_zero_page; addr < (unsigned long)empty_zero_page + 1024*16; addr += L1_CACHE_BYTES) asm volatile("pref @%0"::"r" (addr)); - - jump_to_P2(); - /* Flush D-cache/I-cache */ - ctrl_outl(CCR_CACHE_INIT, CCR); - back_to_P1(); - restore_flags(flags); +#else + unsigned long a0, a1, a2, a3, cnt; + asm volatile( + "mov %0, %1; add #32, %1\n\t" + "mov %0, %2; add #64, %2\n\t" + "mov %1, %3; add #64, %3\n\t" + "1:\n\t" + "pref @%0\n\t" + "dt %4\n\t" + "pref @%1\n\t" + "add %5, %0\n\t" + "pref @%2\n\t" + "add %5, %1\n\t" + "pref @%3\n\t" + "add %5, %2\n\t" + "bf/s 1b\n\t" + " add %5, %3" + : "=&r" (a0), "=&r" (a1), "=&r" (a2), "=&r" (a3), "=&r" (cnt) + : "r" (32*4), "0" (empty_zero_page), "4" (1024*16/32/4) + : "t"); +#endif + flush_icache_all(); } void flush_cache_mm(struct mm_struct *mm) @@ -341,15 +401,51 @@ static void __flush_cache_page(struct vm void flush_cache_range(struct mm_struct *mm, unsigned long start, unsigned long end) { - /* - * We could call flush_cache_page for the pages of these - * range, but it's really time consuming (we have to scan the - * caches all the time...). - * - * We can't use A-bit magic, as there's the case we don't have - * valid entry on TLB. - */ - flush_cache_all(); + unsigned long p = start & PAGE_MASK; + pgd_t *dir; + pmd_t *pmd; + pte_t *pte; + pte_t entry; + unsigned long phys; + unsigned long d = 0; + + dir = pgd_offset(mm, p); + pmd = pmd_offset(dir, p); + + do { + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + p &= ~((1 << PMD_SHIFT) -1); + p += (1 << PMD_SHIFT); + pmd++; + continue; + } + pte = pte_offset(pmd, p); + do { + entry = *pte; + if ((pte_val(entry) & _PAGE_PRESENT)) { + phys = pte_val(entry)&PTE_PHYS_MASK; + if ((p^phys) & CACHE_ALIAS) { + d |= 1 << ((p & CACHE_ALIAS)>>12); + d |= 1 << ((phys & CACHE_ALIAS)>>12); + if (d == 0x0f) + goto loop_exit; + } + } + pte++; + p += PAGE_SIZE; + } while (p < end && (unsigned long)pte & PAGE_MASK); + pmd++; + } while (p < end); + loop_exit: + if (d & 1) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY); + if (d & 2) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x1000); + if (d & 4) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x2000); + if (d & 8) + flush_cache_4096_all(CACHE_OC_ADDRESS_ARRAY | 0x3000); + flush_icache_all(); } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 clear_page.S --- arch/sh/mm/clear_page.S 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.1 +++ arch/sh/mm/clear_page.S 3 Apr 2002 02:21:06 -0000 @@ -182,7 +182,6 @@ ENTRY(__clear_user_page) nop .L4096: .word 4096 -#if defined(CONFIG_CPU_SUBTYPE_SH7750) ENTRY(__flush_cache_4096) .rept 128 mov.l @r4,r0 @@ -191,6 +190,34 @@ ENTRY(__flush_cache_4096) bf 1f mov.l r7,@r4 1: add #32,r4 + .endr + nop + nop + nop + nop + nop + nop + nop + rts + nop + +#if defined(CONFIG_CPU_SUBTYPE_SH7750) +ENTRY(__flush_cache_4096_all) + mov r4,r5 + mov r4,r6 + mov r4,r7 + add #32,r5 + add #-64,r6 + add #-32,r7 + .rept 32 + mov.l r0,@r4 + add r1,r6 + mov.l r0,@r5 + add r1,r7 + mov.l r0,@r6 + add r1,r4 + mov.l r0,@r7 + add r1,r5 .endr nop nop Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.1.1.1.2.1 diff -u -3 -p -r1.1.1.1.2.1 copy_page.S --- arch/sh/mm/copy_page.S 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.1 +++ arch/sh/mm/copy_page.S 3 Apr 2002 02:21:06 -0000 @@ -84,7 +84,6 @@ ENTRY(copy_page) * r10 --- to * r11 --- from */ -#include <linux/linkage.h> ENTRY(__copy_user_page) mov.l r8,@-r15 mov.l r9,@-r15 -- |
From: NIIBE Y. <gn...@m1...> - 2002-03-27 23:54:06
|
David McKay wrote: > Niibe-san, > One of the differences between the 7750 and 7751 (and ST40) processors > is that the restriction to flush D-cache from P2 only has been lifted. Excellent! I didn't know that. If I could suggest more feature, I think it would be great that if we had a feature of flushing cache of physcal address (page), so that we don't need to loop all the cache, in the first place. The change I've made works well for 7750 (in P2). With instruction cache enabled (P1), I think that loop works better. -- |
From: NIIBE Y. <gn...@m1...> - 2002-03-28 02:17:09
|
NIIBE Yutaka wrote: > The change I've made works well for 7750 (in P2). With instruction > cache enabled (P1), I think that loop works better. I mean things like this. 2002-03-28 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/clear_page.S (__flush_cache_4096): Only define for SH7750. * arch/sh/mm/cache-sh4.c (flush_cache_4096): Plain loop for SH7751 and ST40 (no need to go P2). Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.8 diff -u -3 -p -r1.8 cache-sh4.c --- arch/sh/mm/cache-sh4.c 27 Mar 2002 09:30:48 -0000 1.8 +++ arch/sh/mm/cache-sh4.c 28 Mar 2002 02:14:09 -0000 @@ -211,6 +211,7 @@ void flush_cache_sigtramp(unsigned long static inline void flush_cache_4096(unsigned long start, unsigned long phys) { +#if defined(CONFIG_CPU_SUBTYPE_SH7750) register unsigned long addr __asm__ ("r4"); register unsigned long data __asm__ ("r0"); register unsigned long __r5 __asm__ ("r5") = phys; @@ -223,6 +224,18 @@ static inline void flush_cache_4096(unsi : "0" (start), "1" (__flush_cache_4096 + 0x20000000), "r" (__r5), "r" (__r6), "r" (__r7) : "pr"); +#else + /* + * SH7751 and ST40 have no restriction to handle cache. + * (While SH7750 must do that at P2 area.) + */ + unsigned long addr, data; + for (addr = start; addr < start + 4096; addr += 32) { + data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); + if (data == phys) + ctrl_outl(0, addr); + } +#endif } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.2 diff -u -3 -p -r1.2 clear_page.S --- arch/sh/mm/clear_page.S 27 Mar 2002 09:30:48 -0000 1.2 +++ arch/sh/mm/clear_page.S 28 Mar 2002 02:14:09 -0000 @@ -180,14 +180,7 @@ ENTRY(__clear_user_page) nop .L4096: .word 4096 -/************* - unsigned long addr, data; - for (addr = start; addr < start + 4096; addr += 32) { - data = ctrl_inl(addr)&(0x1ffff000|CACHE_VALID); - if (data == phys) - ctrl_outl(0, addr); - } -*************/ +#if defined(CONFIG_CPU_SUBTYPE_SH7750) ENTRY(__flush_cache_4096) .rept 128 mov.l @r4,r0 @@ -206,4 +199,5 @@ ENTRY(__flush_cache_4096) nop rts nop +#endif #endif |