From: Paul M. <le...@us...> - 2002-03-10 08:26:39
|
Update of /cvsroot/linux-mips/linux/arch/mips/mm In directory usw-pr-cvs1:/tmp/cvs-serv1115/arch/mips/mm Modified Files: pg-vr4131.c Log Message: Implemented r4k_copy_page_d32() and r4k_clear_page_d32(). Not all implementations of the Vr4131 have a 16byte d-cache linesize.. Index: pg-vr4131.c =================================================================== RCS file: /cvsroot/linux-mips/linux/arch/mips/mm/pg-vr4131.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- pg-vr4131.c 18 Jan 2002 21:22:17 -0000 1.1 +++ pg-vr4131.c 10 Mar 2002 08:26:36 -0000 1.2 @@ -16,18 +16,6 @@ #include <asm/cacheops.h> -/* - * Zero an entire page on Vr4131 processor. Basically a simple unrolled loop - * should do the job but we want more performance by saving memory bus - * bandwidth. We have five flavours of the routine available for: - * - * - 16byte cachelines and no second level cache - * - 32byte cachelines second level cache - * - a version which handles the buggy R4600 v1.x - * - a version which handles the buggy R4600 v2.0 - * - Finally a last version without fancy cache games for the SC and MC - * versions of R4000 and R4400. - */ void r4k_clear_page_d16(void *page) { unsigned long reg1; @@ -78,7 +66,6 @@ :"$1", "memory"); } - void r4k_copy_page_d16(void *to, void *from) { unsigned long dummy1, dummy2; @@ -140,6 +127,107 @@ void r4k_clear_page_d32(void *page) { + unsigned long reg1; + + __asm__ __volatile__( + ".set\tnoreorder\n\t" + ".set\tnoat\n\t" + ".set\tmips3\n\t" + "mfc0 %1,$16\n\t" + "nop\n\t" + "mtc0\t$0,$28\n\t" + "mtc0\t$0,$29\n\t" + "nop\n\t" + "daddiu\t$1,%0,%3\n" + "1:\tcache\t0x09,(%0)\n\t" + "cache\t%4,(%0)\n\t" + "sd\t$0,(%0)\n\t" + "sd\t$0,8(%0)\n\t" + "cache\t0x09,(%0)\n\t" + "cache\t0x09,16(%0)\n\t" + "sd\t$0,16(%0)\n\t" + "sd\t$0,24(%0)\n\t" + "daddiu\t%0,64\n\t" + "cache\t0x09,16(%0)\n\t" + "cache\t0x09,-32(%0)\n\t" + "cache\t%4,-32(%0)\n\t" + "sd\t$0,-32(%0)\n\t" + "sd\t$0,-24(%0)\n\t" + "cache\t0x09,-32(%0)\n\t" + "cache\t0x09,-16(%0)\n\t" + "sd\t$0,-16(%0)\n\t" + "sd\t$0,-8(%0)\n\t" + "cache\t0x09,-16(%0)\n\t" + "nop\n\t" + "bne\t$1,%0,1b\n\t" + "nop\n\t" + "mtc0 %1,$16\n\t" + "nop\n\t" + ".set\tmips0\n\t" + ".set\tat\n\t" + ".set\treorder" + :"=r" (page), "=&r" (reg1) + :"0" (page), + "I" (PAGE_SIZE), + "i" (Index_Writeback_Inv_D) + :"$1", "memory"); +} + +void r4k_copy_page_d32(void *to, void *from) +{ + unsigned long dummy1, dummy2; + unsigned long reg1, reg2, reg3, reg4; + + __asm__ __volatile__( + ".set\tnoreorder\n\t" + ".set\tnoat\n\t" + ".set\tmips3\n\t" + "daddiu\t$1,%0,%8\n" + "1:\tcache\t%9,(%0)\n\t" + "lw\t%2,(%1)\n\t" + "lw\t%3,4(%1)\n\t" + "lw\t%4,8(%1)\n\t" + "lw\t%5,12(%1)\n\t" + "sw\t%2,(%0)\n\t" + "sw\t%3,4(%0)\n\t" + "sw\t%4,8(%0)\n\t" + "sw\t%5,12(%0)\n\t" + "lw\t%2,16(%1)\n\t" + "lw\t%3,20(%1)\n\t" + "lw\t%4,24(%1)\n\t" + "lw\t%5,28(%1)\n\t" + "sw\t%2,16(%0)\n\t" + "sw\t%3,20(%0)\n\t" + "sw\t%4,24(%0)\n\t" + "sw\t%5,28(%0)\n\t" + "cache\t%9,32(%0)\n\t" + "daddiu\t%0,64\n\t" + "daddiu\t%1,64\n\t" + "lw\t%2,-32(%1)\n\t" + "lw\t%3,-28(%1)\n\t" + "lw\t%4,-24(%1)\n\t" + "lw\t%5,-20(%1)\n\t" + "sw\t%2,-32(%0)\n\t" + "sw\t%3,-28(%0)\n\t" + "sw\t%4,-24(%0)\n\t" + "sw\t%5,-20(%0)\n\t" + "lw\t%2,-16(%1)\n\t" + "lw\t%3,-12(%1)\n\t" + "lw\t%4,-8(%1)\n\t" + "lw\t%5,-4(%1)\n\t" + "sw\t%2,-16(%0)\n\t" + "sw\t%3,-12(%0)\n\t" + "sw\t%4,-8(%0)\n\t" + "bne\t$1,%0,1b\n\t" + "sw\t%5,-4(%0)\n\t" + ".set\tmips0\n\t" + ".set\tat\n\t" + ".set\treorder" + :"=r" (dummy1), "=r" (dummy2), + "=&r" (reg1), "=&r" (reg2), "=&r" (reg3), "=&r" (reg4) + :"0" (to), "1" (from), + "I" (PAGE_SIZE), + "i" (Index_Writeback_Inv_D)); } void r4k_clear_page_r4600_v1(void *page) @@ -163,10 +251,6 @@ } void r4k_clear_page_s128(void *page) -{ -} - -void r4k_copy_page_d32(void *to, void *from) { } |