From: Andy P. <at...@us...> - 2002-04-10 18:37:43
|
Update of /cvsroot/linux-vax/kernel-2.4/arch/i386/lib In directory usw-pr-cvs1:/tmp/cvs-serv10929/i386/lib Modified Files: Makefile delay.c mmx.c usercopy.c Added Files: strstr.c Removed Files: putuser.S Log Message: synch 2.4.15 commit 35 --- NEW FILE --- #include <linux/string.h> char * strstr(const char * cs,const char * ct) { int d0, d1; register char * __res; __asm__ __volatile__( "movl %6,%%edi\n\t" "repne\n\t" "scasb\n\t" "notl %%ecx\n\t" "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ "movl %%ecx,%%edx\n" "1:\tmovl %6,%%edi\n\t" "movl %%esi,%%eax\n\t" "movl %%edx,%%ecx\n\t" "repe\n\t" "cmpsb\n\t" "je 2f\n\t" /* also works for empty string, see above */ "xchgl %%eax,%%esi\n\t" "incl %%esi\n\t" "cmpb $0,-1(%%eax)\n\t" "jne 1b\n\t" "xorl %%eax,%%eax\n\t" "2:" :"=a" (__res), "=&c" (d0), "=&S" (d1) :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) :"dx", "di"); return __res; } Index: Makefile =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/lib/Makefile,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- Makefile 14 Jan 2001 19:20:53 -0000 1.1.1.1 +++ Makefile 10 Apr 2002 14:23:21 -0000 1.2 @@ -8,10 +8,11 @@ L_TARGET = lib.a obj-y = checksum.o old-checksum.o delay.o \ - usercopy.o getuser.o putuser.o iodebug.o \ - memcpy.o + usercopy.o getuser.o \ + memcpy.o strstr.o obj-$(CONFIG_X86_USE_3DNOW) += mmx.o obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +obj-$(CONFIG_DEBUG_IOVIRT) += iodebug.o include $(TOPDIR)/Rules.make Index: delay.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/lib/delay.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- delay.c 14 Jan 2001 19:20:53 -0000 1.1.1.1 +++ delay.c 10 Apr 2002 14:23:21 -0000 1.2 @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> +#include <asm/processor.h> #include <asm/delay.h> #ifdef CONFIG_SMP @@ -34,9 +35,9 @@ rdtscl(bclock); do { + rep_nop(); rdtscl(now); - } - while((now-bclock) < loops); + } while ((now-bclock) < loops); } /* @@ -58,7 +59,7 @@ void __delay(unsigned long loops) { - if(x86_udelay_tsc) + if (x86_udelay_tsc) __rdtsc_delay(loops); else __loop_delay(loops); Index: mmx.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/lib/mmx.c,v retrieving revision 1.1.1.2 retrieving revision 1.2 diff -u -r1.1.1.2 -r1.2 --- mmx.c 25 Feb 2001 23:15:16 -0000 1.1.1.2 +++ mmx.c 10 Apr 2002 14:23:21 -0000 1.2 @@ -1,8 +1,11 @@ +#include <linux/config.h> #include <linux/types.h> #include <linux/string.h> #include <linux/sched.h> #include <asm/i387.h> +#include <asm/hardirq.h> + /* * MMX 3DNow! library helper functions @@ -25,8 +28,14 @@ void *_mmx_memcpy(void *to, const void *from, size_t len) { - void *p=to; - int i= len >> 6; /* len/64 */ + void *p; + int i; + + if (in_interrupt()) + return __memcpy(to, from, len); + + p = to; + i = len >> 6; /* len/64 */ kernel_fpu_begin(); @@ -88,6 +97,13 @@ return p; } +#ifdef CONFIG_MK7 + +/* + * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and + * other MMX using processors do not. + */ + static void fast_clear_page(void *page) { int i; @@ -147,7 +163,7 @@ ".previous" : : "r" (from) ); - for(i=0; i<4096/64; i++) + for(i=0; i<(4096-320)/64; i++) { __asm__ __volatile__ ( "1: prefetch 320(%0)\n" @@ -179,6 +195,29 @@ from+=64; to+=64; } + for(i=(4096-320)/64; i<4096/64; i++) + { + __asm__ __volatile__ ( + "2: movq (%0), %%mm0\n" + " movntq %%mm0, (%1)\n" + " movq 8(%0), %%mm1\n" + " movntq %%mm1, 8(%1)\n" + " movq 16(%0), %%mm2\n" + " movntq %%mm2, 16(%1)\n" + " movq 24(%0), %%mm3\n" + " movntq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm4\n" + " movntq %%mm4, 32(%1)\n" + " movq 40(%0), %%mm5\n" + " movntq %%mm5, 40(%1)\n" + " movq 48(%0), %%mm6\n" + " movntq %%mm6, 48(%1)\n" + " movq 56(%0), %%mm7\n" + " movntq %%mm7, 56(%1)\n" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } /* since movntq is weakly-ordered, a "sfence" is needed to become * ordered again. */ @@ -187,6 +226,110 @@ ); kernel_fpu_end(); } + +#else + +/* + * Generic MMX implementation without K7 specific streaming + */ + +static void fast_clear_page(void *page) +{ + int i; + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + " pxor %%mm0, %%mm0\n" : : + ); + + for(i=0;i<4096/128;i++) + { + __asm__ __volatile__ ( + " movq %%mm0, (%0)\n" + " movq %%mm0, 8(%0)\n" + " movq %%mm0, 16(%0)\n" + " movq %%mm0, 24(%0)\n" + " movq %%mm0, 32(%0)\n" + " movq %%mm0, 40(%0)\n" + " movq %%mm0, 48(%0)\n" + " movq %%mm0, 56(%0)\n" + " movq %%mm0, 64(%0)\n" + " movq %%mm0, 72(%0)\n" + " movq %%mm0, 80(%0)\n" + " movq %%mm0, 88(%0)\n" + " movq %%mm0, 96(%0)\n" + " movq %%mm0, 104(%0)\n" + " movq %%mm0, 112(%0)\n" + " movq %%mm0, 120(%0)\n" + : : "r" (page) : "memory"); + page+=128; + } + + kernel_fpu_end(); +} + +static void fast_copy_page(void *to, void *from) +{ + int i; + + + kernel_fpu_begin(); + + __asm__ __volatile__ ( + "1: prefetch (%0)\n" + " prefetch 64(%0)\n" + " prefetch 128(%0)\n" + " prefetch 192(%0)\n" + " prefetch 256(%0)\n" + "2: \n" + ".section .fixup, \"ax\"\n" + "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from) ); + + for(i=0; i<4096/64; i++) + { + __asm__ __volatile__ ( + "1: prefetch 320(%0)\n" + "2: movq (%0), %%mm0\n" + " movq 8(%0), %%mm1\n" + " movq 16(%0), %%mm2\n" + " movq 24(%0), %%mm3\n" + " movq %%mm0, (%1)\n" + " movq %%mm1, 8(%1)\n" + " movq %%mm2, 16(%1)\n" + " movq %%mm3, 24(%1)\n" + " movq 32(%0), %%mm0\n" + " movq 40(%0), %%mm1\n" + " movq 48(%0), %%mm2\n" + " movq 56(%0), %%mm3\n" + " movq %%mm0, 32(%1)\n" + " movq %%mm1, 40(%1)\n" + " movq %%mm2, 48(%1)\n" + " movq %%mm3, 56(%1)\n" + ".section .fixup, \"ax\"\n" + "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + " jmp 2b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 1b, 3b\n" + ".previous" + : : "r" (from), "r" (to) : "memory"); + from+=64; + to+=64; + } + kernel_fpu_end(); +} + + +#endif /* * Favour MMX for page clear and copy. Index: usercopy.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/i386/lib/usercopy.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- usercopy.c 14 Jan 2001 19:20:54 -0000 1.1.1.1 +++ usercopy.c 10 Apr 2002 14:23:21 -0000 1.2 @@ -34,6 +34,8 @@ else mmx_copy_user_zeroing(to, from, n); } + else + memset(to, 0, n); return n; } @@ -42,6 +44,7 @@ unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) { + prefetch(from); if (access_ok(VERIFY_WRITE, to, n)) __copy_user(to,from,n); return n; @@ -50,8 +53,11 @@ unsigned long __generic_copy_from_user(void *to, const void *from, unsigned long n) { + prefetchw(to); if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to,from,n); + else + memset(to, 0, n); return n; } @@ -159,6 +165,8 @@ unsigned long res, tmp; __asm__ __volatile__( + " testl %0, %0\n" + " jz 3f\n" " andl %0,%%ecx\n" "0: repne; scasb\n" " setne %%al\n" @@ -167,6 +175,8 @@ "1:\n" ".section .fixup,\"ax\"\n" "2: xorl %%eax,%%eax\n" + " jmp 1b\n" + "3: movb $1,%%al\n" " jmp 1b\n" ".previous\n" ".section __ex_table,\"a\"\n" --- putuser.S DELETED --- |