From: NIIBE Y. <gn...@m1...> - 2002-03-29 02:04:47
|
This fixes flush_cache_range performance issue. Now, it's comparable to flush_cache_range = flush_cache_all implementation (when measured process spawn test). (That is, two times faster.) Besides, flush_cache_all has been improved, so that it doesn't hold interrupt off. 2002-03-29 NIIBE Yutaka <gn...@m1...> * arch/sh/mm/cache-sh4.c (flush_cache_4096_nocheck): New function. (flush_icache_all): New function. (flush_cache_all): Use flush_icache_all. Don't need to protect from interrupt as we don't flush dcache by writing CCR. (flush_cache_range): Don't call __flush_cache_page, but call flush_cache_4096_nocheck. * arch/sh/mm/clear_page.S (__flush_cache_4096_nocheck): New function. Index: arch/sh/mm/cache-sh4.c =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/cache-sh4.c,v retrieving revision 1.9 diff -u -3 -p -r1.9 cache-sh4.c --- arch/sh/mm/cache-sh4.c 29 Mar 2002 00:02:04 -0000 1.9 +++ arch/sh/mm/cache-sh4.c 29 Mar 2002 01:58:44 -0000 @@ -208,6 +208,33 @@ void flush_cache_sigtramp(unsigned long restore_flags(flags); } +static void flush_cache_4096_nocheck(unsigned long start) +{ +#if defined(CONFIG_CPU_SUBTYPE_SH7750) + register unsigned long __r0 __asm__ ("r0") = 0; + register unsigned long __r1 __asm__ ("r1") = 128; + register unsigned long __r4 __asm__ ("r4"); + register unsigned long __r5 __asm__ ("r5"); + register unsigned long __r6 __asm__ ("r6"); + register unsigned long __r7 __asm__ ("r7"); + extern void __flush_cache_4096_nocheck(unsigned long); + + asm volatile("jsr @%7; nop" + : "=&r" (__r4), "=&r" (__r5), "=&r" (__r6), "=&r" (__r7) + : "0" (start), "r" (__r0), "r" (__r1), + "r" (__flush_cache_4096_nocheck + 0x20000000) + : "pr"); +#else + /* + * SH7751 and ST40 have no restriction to handle cache. + * (While SH7750 must do that at P2 area.) + */ + unsigned long addr; + for (addr = start; addr < start + 4096; addr += 32) + ctrl_outl(0, addr); +#endif +} + static inline void flush_cache_4096(unsigned long start, unsigned long phys) { @@ -262,25 +289,54 @@ void flush_dcache_page(struct page *page } } -void flush_cache_all(void) +static inline void flush_icache_all(void) { - extern unsigned long empty_zero_page[1024]; unsigned long flags; - unsigned long addr; save_and_cli(flags); + jump_to_P2(); + /* Flush I-cache */ + ctrl_outl(CCR_CACHE_VAL|CCR_CACHE_ICI, CCR); + back_to_P1(); + restore_flags(flags); +} + +void flush_cache_all(void) +{ + extern unsigned long empty_zero_page[1024]; /* Prefetch the data to write back D-cache */ + +#if C_IMPLEMENTATION + unsigned long addr; + for (addr = (unsigned long)empty_zero_page; addr < (unsigned long)empty_zero_page + 1024*16; addr += L1_CACHE_BYTES) asm volatile("pref @%0"::"r" (addr)); - - jump_to_P2(); - /* Flush D-cache/I-cache */ - ctrl_outl(CCR_CACHE_INIT, CCR); - back_to_P1(); - restore_flags(flags); +#else + unsigned long a0, a1, a2, a3, cnt; + asm volatile( + "mov %0, %1; add #32, %1\n\t" + "mov %0, %2; add #64, %2\n\t" + "mov %1, %3; add #64, %3\n\t" + "1:\n\t" + "pref @%0\n\t" + "dt %4\n\t" + "pref @%1\n\t" + "add %5, %0\n\t" + "pref @%2\n\t" + "add %5, %1\n\t" + "pref @%3\n\t" + "add %5, %2\n\t" + "bf/s 1b\n\t" + " add %5, %3" + : "=&r" (a0), "=&r" (a1), "=&r" (a2), "=&r" (a3), "=&r" (cnt) + : "r" (32*4), "0" (empty_zero_page), "4" (1024*16/32/4) + : "t"); + } +#endif + flush_icache_all(); } void flush_cache_mm(struct mm_struct *mm) @@ -348,6 +404,7 @@ void flush_cache_range(struct vm_area_st pte_t *pte; pte_t entry; unsigned long phys; + unsigned long d = 0; dir = pgd_offset(vma->vm_mm, p); pmd = pmd_offset(dir, p); @@ -364,13 +421,29 @@ void flush_cache_range(struct vm_area_st entry = *pte; if ((pte_val(entry) & _PAGE_PRESENT)) { phys = pte_val(entry)&PTE_PHYS_MASK; - __flush_cache_page(vma, p, phys); + if ((p^phys) & CACHE_ALIAS) { + d |= 1 << ((p & CACHE_ALIAS)>>12); + d |= 1 << ((phys & CACHE_ALIAS)>>12); + if (d == 0x0f) + goto loop_exit; + } } pte++; p += PAGE_SIZE; } while (p < end && (unsigned long)pte & PAGE_MASK); pmd++; } while (p < end); + loop_exit: + if (d & 1) + flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY); + if (d & 2) + flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x1000); + if (d & 4) + flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x2000); + if (d & 8) + flush_cache_4096_nocheck(CACHE_OC_ADDRESS_ARRAY | 0x3000); + if (vma->vm_flags & VM_EXEC) + flush_icache_all(); } /* Index: arch/sh/mm/clear_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/clear_page.S,v retrieving revision 1.3 diff -u -3 -p -r1.3 clear_page.S --- arch/sh/mm/clear_page.S 29 Mar 2002 00:02:04 -0000 1.3 +++ arch/sh/mm/clear_page.S 29 Mar 2002 01:58:44 -0000 @@ -201,5 +201,32 @@ ENTRY(__flush_cache_4096) nop rts nop + +ENTRY(__flush_cache_4096_nocheck) + mov r4,r5 + mov r4,r6 + mov r4,r7 + add #32,r5 + add #-64,r6 + add #-32,r7 + .rept 32 + mov.l r0,@r4 + add r1,r6 + mov.l r0,@r5 + add r1,r7 + mov.l r0,@r6 + add r1,r4 + mov.l r0,@r7 + add r1,r5 + .endr + nop + nop + nop + nop + nop + nop + nop + rts + nop #endif #endif |