From: Andy P. <at...@us...> - 2002-04-09 17:08:27
|
Update of /cvsroot/linux-vax/kernel-2.4/arch/cris/lib In directory usw-pr-cvs1:/tmp/cvs-serv13825/cris/lib Modified Files: Makefile checksum.S checksumcopy.S dmacopy.c memset.c old_checksum.c string.c usercopy.c Added Files: csumcpfruser.S dram_init.S hw_settings.S Log Message: synch 2.4.15 commit 29 --- NEW FILE --- /* * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into * csum_partial_copy_from_user by adding exception records. * * Copyright (C) 2001 Axis Communications AB. * * Author: Hans-Peter Nilsson. */ #include <asm/errno.h> /* Same function body, but a different name. If we just added exception records to _csum_partial_copy_nocheck and made it generic, we wouldn't know a user fault from a kernel fault and we would have overhead in each kernel caller for the error-pointer argument. unsigned int csum_partial_copy_from_user (const char *src, char *dst, int len, unsigned int sum, int *errptr); Note that the errptr argument is only set if we encounter an error. It is conveniently located on the stack, so the normal function body does not have to handle it. */ #define csum_partial_copy_nocheck csum_partial_copy_from_user /* There are local labels numbered 1, 2 and 3 present to mark the different from-user accesses. */ #include "checksumcopy.S" .section .fixup,"ax" ;; Here from the movem loop; restore stack. 4: movem [$sp+],$r8 ;; r12 is already decremented. Add back chunk_size-2. addq 40-2,$r12 ;; Here from the word loop; r12 is off by 2; add it back. 5: addq 2,$r12 ;; Here from a failing single byte. 6: ;; Signal in *errptr that we had a failing access. moveq -EFAULT,$r9 move.d $r9,[[$sp]] ;; Clear the rest of the destination area using memset. Preserve the ;; checksum for the readable bytes. push $srp push $r13 move.d $r11,$r10 clear.d $r11 jsr memset pop $r10 jump [$sp+] .previous .section __ex_table,"a" .dword 1b,4b .dword 2b,5b .dword 3b,6b .previous --- NEW FILE --- /* $Id: dram_init.S,v 1.1 2002/04/09 17:03:16 atp Exp $ * * DRAM/SDRAM initialization - alter with care * This file is intended to be included from other assembler files * * Note: This file may not modify r9 because r9 is used to carry * information from the decompresser to the kernel * * Copyright (C) 2000, 2001 Axis Communications AB * * Authors: Mikael Starvik (st...@ax...) * * $Log: dram_init.S,v $ * Revision 1.1 2002/04/09 17:03:16 atp * synch 2.4.15 commit 29 * * Revision 1.10 2001/10/04 12:00:21 martinnn * Added missing underscores. * * Revision 1.9 2001/10/01 14:47:35 bjornw * Added register prefixes and removed underscores * * Revision 1.8 2001/05/15 07:12:45 hp * Copy warning from head.S about r8 and r9 * * Revision 1.7 2001/04/18 12:05:39 bjornw * Fixed comments, and explicitely include config.h to be sure its there * * Revision 1.6 2001/04/10 06:20:16 starvik * Delay should be 200us, not 200ns * * Revision 1.5 2001/04/09 06:01:13 starvik * Added support for 100 MHz SDRAMs * * Revision 1.4 2001/03/26 14:24:01 bjornw * Namechange of some config options * * Revision 1.3 2001/03/23 08:29:41 starvik * Corrected calculation of mrs_data * * Revision 1.2 2001/02/08 15:20:00 starvik * Corrected SDRAM initialization * Should now be included as inline * * Revision 1.1 2001/01/29 13:08:02 starvik * Initial version * This file should be included from all assembler files that needs to * initialize DRAM/SDRAM. * */ /* Just to be certain the config file is included, we include it here * explicitely instead of depending on it being included in the file that * uses this code. */ #include <linux/config.h> ;; WARNING! The registers r8 and r9 are used as parameters carrying ;; information from the decompressor (if the kernel was compressed). ;; They should not be used in the code below. #ifndef CONFIG_SVINTO_SIM move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0 move.d $r0, [R_WAITSTATES] move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0 move.d $r0, [R_BUS_CONFIG] #ifndef CONFIG_ETRAX_SDRAM move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0 move.d $r0, [R_DRAM_CONFIG] move.d CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0 move.d $r0, [R_DRAM_TIMING] #else ; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization ; Bank configuration move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0 move.d $r0, [R_SDRAM_CONFIG] ; Calculate value of mrs_data ; CAS latency = 2 && bus_width = 32 => 0x40 ; CAS latency = 3 && bus_width = 32 => 0x60 ; CAS latency = 2 && bus_width = 16 => 0x20 ; CAS latency = 3 && bus_width = 16 => 0x30 move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2 move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1 move.d $r1, $r3 and.d 0x03, $r1 ; Get CAS latency and.d 0x1000, $r3 ; 50 or 100 MHz? beq _speed_50 nop _speed_100: cmp.d 0x00, $r1 ; CAS latency = 2? beq _bw_check nop or.d 0x20, $r2 ; CAS latency = 3 ba _bw_check nop _speed_50: cmp.d 0x01, $r1 ; CAS latency = 2? beq _bw_check nop or.d 0x20, $r2 ; CAS latency = 3 _bw_check: move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1 and.d 0x800000, $r1 ; DRAM width is bit 23 bne _set_timing nop lsrq 1, $r2 ; 16 bits. Shift down value. ; Set timing parameters. Starts master clock _set_timing: move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1 and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0 or.d 0x80000000, $r1 ; Make sure sdram enable bit is set move.d $r1, $r5 or.d 0x0000c000, $r1 ; ref = disable lslq 16, $r2 ; mrs data starts at bit 16 or.d $r2, $r1 move.d $r1, [R_SDRAM_TIMING] ; Wait 200us move.d 10000, $r2 1: bne 1b subq 1, $r2 ; Issue initialization command sequence move.d _sdram_commands_start, $r2 move.d _sdram_commands_end, $r3 1: clear.d $r4 move.b [$r2+], $r4 lslq 9, $r4 ; Command starts at bit 9 or.d $r1, $r4 move.d $r4, [R_SDRAM_TIMING] nop ; Wait five nop cycles between each command nop nop nop nop cmp.d $r2, $r3 bne 1b nop move.d $r5, [R_SDRAM_TIMING] ba _sdram_commands_end nop _sdram_commands_start: .byte 3 ; Precharge .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 2 ; refresh .byte 0 ; nop .byte 1 ; mrs .byte 0 ; nop _sdram_commands_end: #endif #endif --- NEW FILE --- /* * $Id: hw_settings.S,v 1.1 2002/04/09 17:03:16 atp Exp $ * * This table is used by some tools to extract hardware parameters. * The table should be included in the kernel and the decompressor. * Don't forget to update the tools if you change this table. * * Copyright (C) 2001 Axis Communications AB * * Authors: Mikael Starvik (st...@ax...) */ #define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \ (CONFIG_ETRAX_DEF_R_PORT_PA_DATA)) #define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \ (CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \ (CONFIG_ETRAX_DEF_R_PORT_PB_DATA)) .ascii "HW_PARAM_MAGIC" ; Magic number .dword 0xc0004000 ; Kernel start address ; Debug port #ifdef CONFIG_ETRAX_DEBUG_PORT0 .dword 0 #elif defined(CONFIG_ETRAX_DEBUG_PORT1) .dword 1 #elif defined(CONFIG_ETRAX_DEBUG_PORT2) .dword 2 #elif defined(CONFIG_ETRAX_DEBUG_PORT3) .dword 3 #else .dword 4 ; No debug #endif ; SDRAM or EDO DRAM? #ifdef CONFIG_ETRAX_SDRAM .dword 1 #else .dword 0 #endif ; Register values .dword R_WAITSTATES .dword CONFIG_ETRAX_DEF_R_WAITSTATES .dword R_BUS_CONFIG .dword CONFIG_ETRAX_DEF_R_BUS_CONFIG #ifdef CONFIG_ETRAX_SDRAM .dword R_SDRAM_CONFIG .dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG .dword R_SDRAM_TIMING .dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING #else .dword R_DRAM_CONFIG .dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG .dword R_DRAM_TIMING .dword CONFIG_ETRAX_DEF_R_DRAM_TIMING #endif .dword R_PORT_PA_SET .dword PA_SET_VALUE .dword R_PORT_PB_SET .dword PB_SET_VALUE .dword 0 ; No more register values Index: Makefile =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/Makefile,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- Makefile 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ Makefile 9 Apr 2002 17:03:16 -0000 1.2 @@ -6,6 +6,6 @@ $(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o L_TARGET = lib.a -obj-y = checksum.o checksumcopy.o string.o usercopy.o memset.o +obj-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o include $(TOPDIR)/Rules.make Index: checksum.S =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksum.S,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- checksum.S 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ checksum.S 9 Apr 2002 17:03:16 -0000 1.2 @@ -1,113 +1,124 @@ - ;; $Id$ - ;; A fast checksum routine using movem - ;; Copyright (c) 1998 Bjorn Wesen/Axis Communications AB +/* $Id$ + * A fast checksum routine using movem + * Copyright (c) 1998-2001 Axis Communications AB + * + * csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ - ;; csum_partial(const unsigned char * buff, int len, unsigned int sum) - - .globl _csum_partial -_csum_partial: + .globl csum_partial +csum_partial: + ;; r10 - src + ;; r11 - length + ;; r12 - checksum + ;; check for breakeven length between movem and normal word looping versions + ;; we also do _NOT_ want to compute a checksum over more than the + ;; actual length when length < 40 - cmpu.w 80,r11 - bcs no_movem + cmpu.w 80,$r11 + blo _word_loop nop ;; need to save the registers we use below in the movem loop ;; this overhead is why we have a check above for breakeven length + ;; only r0 - r8 have to be saved, the other ones are clobber-able + ;; according to the ABI - subq 9*4,sp - movem r8,[sp] + subq 9*4,$sp + movem $r8,[$sp] ;; do a movem checksum - ;; r10 - src - ;; r11 - length - ;; r12 - checksum - - subq 10*4,r11 ; update length for the first loop + subq 10*4,$r11 ; update length for the first loop -mloop: movem [r10+],r9 ; read 10 longwords +_mloop: movem [$r10+],$r9 ; read 10 longwords ;; perform dword checksumming on the 10 longwords - add.d r0,r12 + add.d $r0,$r12 ax - add.d r1,r12 + add.d $r1,$r12 ax - add.d r2,r12 + add.d $r2,$r12 ax - add.d r3,r12 + add.d $r3,$r12 ax - add.d r4,r12 + add.d $r4,$r12 ax - add.d r5,r12 + add.d $r5,$r12 ax - add.d r6,r12 + add.d $r6,$r12 ax - add.d r7,r12 + add.d $r7,$r12 ax - add.d r8,r12 + add.d $r8,$r12 ax - add.d r9,r12 + add.d $r9,$r12 ;; fold the carry into the checksum, to avoid having to loop the carry ;; back into the top ax - addq 0,r12 + addq 0,$r12 ax ; do it again, since we might have generated a carry - addq 0,r12 + addq 0,$r12 - subq 10*4,r11 - bge mloop + subq 10*4,$r11 + bge _mloop nop - addq 10*4,r11 ; compensate for last loop underflowing length + addq 10*4,$r11 ; compensate for last loop underflowing length - ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below - - moveq -1,r1 ; put 0xffff in r1, faster than move.d 0xffff,r1 - lsrq 16,r1 - - move.d r12,r0 - lsrq 16,r0 ; r0 = checksum >> 16 - and.d r1,r12 ; checksum = checksum & 0xffff - add.d r0,r12 ; checksum += r0 - move.d r12,r0 ; do the same again, maybe we got a carry last add - lsrq 16,r0 - and.d r1,r12 - add.d r0,r12 - - movem [sp+],r8 ; restore regs - -no_movem: - cmpq 2,r11 - blt no_words + movem [$sp+],$r8 ; restore regs + +_word_loop: + ;; only fold if there is anything to fold. + + cmpq 0,$r12 + beq _no_fold + + ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below. + ;; r9 and r13 can be used as temporaries. + + moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9 + lsrq 16,$r9 + + move.d $r12,$r13 + lsrq 16,$r13 ; r13 = checksum >> 16 + and.d $r9,$r12 ; checksum = checksum & 0xffff + add.d $r13,$r12 ; checksum += r13 + move.d $r12,$r13 ; do the same again, maybe we got a carry last add + lsrq 16,$r13 + and.d $r9,$r12 + add.d $r13,$r12 + +_no_fold: + cmpq 2,$r11 + blt _no_words nop ;; checksum the rest of the words - subq 2,r11 + subq 2,$r11 -wloop: subq 2,r11 - bge wloop - addu.w [r10+],r12 +_wloop: subq 2,$r11 + bge _wloop + addu.w [$r10+],$r12 - addq 2,r11 + addq 2,$r11 -no_words: +_no_words: ;; see if we have one odd byte more - cmpq 1,r11 - beq do_byte + cmpq 1,$r11 + beq _do_byte nop ret - move.d r12, r10 + move.d $r12, $r10 -do_byte: +_do_byte: ;; copy and checksum the last byte - addu.b [r10],r12 + addu.b [$r10],$r12 ret - move.d r12, r10 + move.d $r12, $r10 - \ No newline at end of file Index: checksumcopy.S =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksumcopy.S,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- checksumcopy.S 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ checksumcopy.S 9 Apr 2002 17:03:16 -0000 1.2 @@ -1,120 +1,132 @@ - ;; $Id$ - ;; A fast checksum+copy routine using movem - ;; Copyright (c) 1998, 2000 Axis Communications AB - ;; - ;; Authors: Bjorn Wesen - ;; - ;; csum_partial_copy_nocheck(const char *src, char *dst, - ;; int len, unsigned int sum) +/* $Id$ + * A fast checksum+copy routine using movem + * Copyright (c) 1998, 2001 Axis Communications AB + * + * Authors: Bjorn Wesen + * + * csum_partial_copy_nocheck(const char *src, char *dst, + * int len, unsigned int sum) + */ - .globl _csum_partial_copy_nocheck -_csum_partial_copy_nocheck: + .globl csum_partial_copy_nocheck +csum_partial_copy_nocheck: + ;; r10 - src + ;; r11 - dst + ;; r12 - length + ;; r13 - checksum + ;; check for breakeven length between movem and normal word looping versions + ;; we also do _NOT_ want to compute a checksum over more than the + ;; actual length when length < 40 - cmpu.w 80,r12 - bcs no_movem + cmpu.w 80, $r12 + blo _word_loop nop ;; need to save the registers we use below in the movem loop ;; this overhead is why we have a check above for breakeven length + ;; only r0 - r8 have to be saved, the other ones are clobber-able + ;; according to the ABI - subq 9*4,sp - movem r8,[sp] + subq 9*4, $sp + movem $r8, [$sp] ;; do a movem copy and checksum - ;; r10 - src - ;; r11 - dst - ;; r12 - length - ;; r13 - checksum - - subq 10*4,r12 ; update length for the first loop + subq 10*4, $r12 ; update length for the first loop -mloop: movem [r10+],r9 ; read 10 longwords - movem r9,[r11+] ; write 10 longwords +_mloop: movem [$r10+],$r9 ; read 10 longwords +1: ;; A failing userspace access will have this as PC. + movem $r9,[$r11+] ; write 10 longwords ;; perform dword checksumming on the 10 longwords - add.d r0,r13 + add.d $r0,$r13 ax - add.d r1,r13 + add.d $r1,$r13 ax - add.d r2,r13 + add.d $r2,$r13 ax - add.d r3,r13 + add.d $r3,$r13 ax - add.d r4,r13 + add.d $r4,$r13 ax - add.d r5,r13 + add.d $r5,$r13 ax - add.d r6,r13 + add.d $r6,$r13 ax - add.d r7,r13 + add.d $r7,$r13 ax - add.d r8,r13 + add.d $r8,$r13 ax - add.d r9,r13 + add.d $r9,$r13 ;; fold the carry into the checksum, to avoid having to loop the carry ;; back into the top ax - addq 0,r13 + addq 0,$r13 + ax ; do it again, since we might have generated a carry + addq 0,$r13 - subq 10*4,r12 - bge mloop + subq 10*4,$r12 + bge _mloop nop - addq 10*4,r12 ; compensate for last loop underflowing length + addq 10*4,$r12 ; compensate for last loop underflowing length + + movem [$sp+],$r8 ; restore regs + +_word_loop: + ;; only fold if there is anything to fold. + + cmpq 0,$r13 + beq _no_fold ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below + ;; r9 can be used as temporary. - moveq -1,r1 ; put 0xffff in r1, faster than move.d 0xffff,r1 - lsrq 16,r1 - - move.d r13,r0 - lsrq 16,r0 ; r0 = checksum >> 16 - and.d r1,r13 ; checksum = checksum & 0xffff - add.d r0,r13 ; checksum += r0 - move.d r13,r0 ; do the same again, maybe we got a carry last add - lsrq 16,r0 - and.d r1,r13 - add.d r0,r13 - - movem [sp+],r8 ; restore regs - -no_movem: - cmpq 2,r12 - blt no_words + move.d $r13,$r9 + lsrq 16,$r9 ; r0 = checksum >> 16 + and.d 0xffff,$r13 ; checksum = checksum & 0xffff + add.d $r9,$r13 ; checksum += r0 + move.d $r13,$r9 ; do the same again, maybe we got a carry last add + lsrq 16,$r9 + and.d 0xffff,$r13 + add.d $r9,$r13 + +_no_fold: + cmpq 2,$r12 + blt _no_words nop ;; copy and checksum the rest of the words - subq 2,r12 + subq 2,$r12 -wloop: move.w [r10+],r9 - addu.w r9,r13 - subq 2,r12 - bge wloop - move.w r9,[r11+] +_wloop: move.w [$r10+],$r9 +2: ;; A failing userspace access will have this as PC. + addu.w $r9,$r13 + subq 2,$r12 + bge _wloop + move.w $r9,[$r11+] - addq 2,r12 + addq 2,$r12 -no_words: +_no_words: ;; see if we have one odd byte more - cmpq 1,r12 - beq do_byte + cmpq 1,$r12 + beq _do_byte nop ret - move.d r13, r10 + move.d $r13, $r10 -do_byte: +_do_byte: ;; copy and checksum the last byte - move.b [r10],r9 - addu.b r9,r13 - move.b r9,[r11] + move.b [$r10],$r9 +3: ;; A failing userspace access will have this as PC. + addu.b $r9,$r13 + move.b $r9,[$r11] ret - move.d r13, r10 - - \ No newline at end of file + move.d $r13, $r10 Index: dmacopy.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/dmacopy.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 Index: memset.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/memset.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- memset.c 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ memset.c 9 Apr 2002 17:03:16 -0000 1.2 @@ -27,6 +27,8 @@ /*# */ /*#-------------------------------------------------------------------------*/ +#include <linux/types.h> + /* No, there's no macro saying 12*4, since it is "hard" to get it into the asm in a good way. Thus better to expose the problem everywhere. */ @@ -39,7 +41,7 @@ void *memset(void *pdst, int c, - unsigned int plen) + size_t plen) { /* Ok. Now we want the parameters put in special registers. Make sure the compiler is able to make something useful of this. */ @@ -54,7 +56,12 @@ /* Ugh. This is fragile at best. Check with newer GCC releases, if they compile cascaded "x |= x << 8" sanely! */ - __asm__("movu.b %0,r13\n\tlslq 8,r13\n\tmove.b %0,r13\n\tmove.d r13,%0\n\tlslq 16,r13\n\tor.d r13,%0" + __asm__("movu.b %0,$r13\n\t" + "lslq 8,$r13\n\t" + "move.b %0,$r13\n\t" + "move.d $r13,%0\n\t" + "lslq 16,$r13\n\t" + "or.d $r13,%0" : "=r" (lc) : "0" (lc) : "r13"); { @@ -111,36 +118,36 @@ ;; Save the registers we'll clobber in the movem process ;; on the stack. Don't mention them to gcc, it will only be ;; upset. - subq 11*4,sp - movem r10,[sp] + subq 11*4,$sp + movem $r10,[$sp] - move.d r11,r0 - move.d r11,r1 - move.d r11,r2 - move.d r11,r3 - move.d r11,r4 - move.d r11,r5 - move.d r11,r6 - move.d r11,r7 - move.d r11,r8 - move.d r11,r9 - move.d r11,r10 + move.d $r11,$r0 + move.d $r11,$r1 + move.d $r11,$r2 + move.d $r11,$r3 + move.d $r11,$r4 + move.d $r11,$r5 + move.d $r11,$r6 + move.d $r11,$r7 + move.d $r11,$r8 + move.d $r11,$r9 + move.d $r11,$r10 ;; Now we've got this: ;; r13 - dst ;; r12 - n ;; Update n for the first loop - subq 12*4,r12 + subq 12*4,$r12 0: - subq 12*4,r12 + subq 12*4,$r12 bge 0b - movem r11,[r13+] + movem $r11,[$r13+] - addq 12*4,r12 ;; compensate for last loop underflowing n + addq 12*4,$r12 ;; compensate for last loop underflowing n ;; Restore registers from stack - movem [sp+],r10" + movem [$sp+],$r10" /* Outputs */ : "=r" (dst), "=r" (n) /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); Index: old_checksum.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/old_checksum.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 Index: string.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/string.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- string.c 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ string.c 9 Apr 2002 17:03:16 -0000 1.2 @@ -31,9 +31,11 @@ /*# */ /*#-------------------------------------------------------------------------*/ +#include <linux/types.h> + void *memcpy(void *pdst, const void *psrc, - unsigned int pn) + size_t pn) { /* Ok. Now we want the parameters put in special registers. Make sure the compiler is able to make something useful of this. @@ -100,8 +102,8 @@ ;; ;; Save the registers we'll use in the movem process ;; on the stack. - subq 11*4,sp - movem r10,[sp] + subq 11*4,$sp + movem $r10,[$sp] ;; Now we've got this: ;; r11 - src @@ -109,17 +111,17 @@ ;; r12 - n ;; Update n for the first loop - subq 44,r12 + subq 44,$r12 0: - movem [r11+],r10 - subq 44,r12 + movem [$r11+],$r10 + subq 44,$r12 bge 0b - movem r10,[r13+] + movem $r10,[$r13+] - addq 44,r12 ;; compensate for last loop underflowing n + addq 44,$r12 ;; compensate for last loop underflowing n ;; Restore registers from stack - movem [sp+],r10" + movem [$sp+],$r10" /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) /* Inputs */ : "0" (dst), "1" (src), "2" (n)); Index: usercopy.c =================================================================== RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/usercopy.c,v retrieving revision 1.1.1.1 retrieving revision 1.2 diff -u -r1.1.1.1 -r1.2 --- usercopy.c 25 Feb 2001 23:15:23 -0000 1.1.1.1 +++ usercopy.c 9 Apr 2002 17:03:16 -0000 1.2 @@ -95,8 +95,8 @@ ;; ;; Save the registers we'll use in the movem process ;; on the stack. - subq 11*4,sp - movem r10,[sp] + subq 11*4,$sp + movem $r10,[$sp] ;; Now we've got this: ;; r11 - src @@ -104,7 +104,7 @@ ;; r12 - n ;; Update n for the first loop - subq 44,r12 + subq 44,$r12 ; Since the noted PC of a faulting instruction in a delay-slot of a taken ; branch, is that of the branch target, we actually point at the from-movem @@ -113,15 +113,15 @@ ; after *that* movem. 0: - movem [r11+],r10 - subq 44,r12 + movem [$r11+],$r10 + subq 44,$r12 bge 0b - movem r10,[r13+] + movem $r10,[$r13+] 1: - addq 44,r12 ;; compensate for last loop underflowing n + addq 44,$r12 ;; compensate for last loop underflowing n ;; Restore registers from stack - movem [sp+],r10 + movem [$sp+],$r10 2: .section .fixup,\"ax\" @@ -130,14 +130,14 @@ ; performance penalty for sany use; the program will segfault soon enough. 3: - move.d [sp],r10 - addq 44,r10 - move.d r10,[sp] + move.d [$sp],$r10 + addq 44,$r10 + move.d $r10,[$sp] jump 0b 4: - movem [sp+],r10 - addq 44,r10 - addq 44,r12 + movem [$sp+],$r10 + addq 44,$r10 + addq 44,$r12 jump 2b .previous @@ -255,8 +255,8 @@ ;; ;; Save the registers we'll use in the movem process ;; on the stack. - subq 11*4,sp - movem r10,[sp] + subq 11*4,$sp + movem $r10,[$sp] ;; Now we've got this: ;; r11 - src @@ -264,42 +264,82 @@ ;; r12 - n ;; Update n for the first loop - subq 44,r12 + subq 44,$r12 0: - movem [r11+],r10 + movem [$r11+],$r10 1: - subq 44,r12 + subq 44,$r12 bge 0b - movem r10,[r13+] - - addq 44,r12 ;; compensate for last loop underflowing n + movem $r10,[$r13+] + addq 44,$r12 ;; compensate for last loop underflowing n +8: ;; Restore registers from stack - movem [sp+],r10 + movem [$sp+],$r10 .section .fixup,\"ax\" -; To provide a correct count in r10 of bytes that failed to be copied, -; we jump back into the loop if the loop-branch was taken. -; There is no performance penalty; the program will segfault soon -; enough. +;; Do not jump back into the loop if we fail. For some uses, we get a +;; page fault but for performance reasons we care to not get further +;; faults. For example, fs/super.c at one time did +;; i = size - copy_from_user((void *)page, data, size); +;; which would cause repeated faults while clearing the remainder of +;; the SIZE bytes at PAGE after the first fault. 3: - move.d [sp],r10 - addq 44,r10 - move.d r10,[sp] - clear.d r0 - clear.d r1 - clear.d r2 - clear.d r3 - clear.d r4 - clear.d r5 - clear.d r6 - clear.d r7 - clear.d r8 - clear.d r9 - clear.d r10 - jump 1b + move.d [$sp],$r10 + +;; Number of remaining bytes, cleared but not copied, is r12 + 44. + + add.d $r12,$r10 + addq 44,$r10 + + move.d $r10,[$sp] + clear.d $r0 + clear.d $r1 + clear.d $r2 + clear.d $r3 + clear.d $r4 + clear.d $r5 + clear.d $r6 + clear.d $r7 + clear.d $r8 + clear.d $r9 + clear.d $r10 + +;; Perform clear similar to the copy-loop. + +4: + subq 44,$r12 + bge 4b + movem $r10,[$r13+] + +;; Clear by four for the remaining multiples. + + addq 40,$r12 + bmi 6f + nop +5: + subq 4,$r12 + bpl 5b + clear.d [$r13+] +6: + addq 4,$r12 + beq 7f + nop + + subq 1,$r12 + beq 7f + clear.b [$r13+] + + subq 1,$r12 + beq 7f + clear.b [$r13+] + + clear.d $r12 + clear.b [$r13+] +7: + jump 8b .previous .section __ex_table,\"a\" @@ -411,50 +451,50 @@ ;; Save the registers we'll clobber in the movem process ;; on the stack. Don't mention them to gcc, it will only be ;; upset. - subq 11*4,sp - movem r10,[sp] + subq 11*4,$sp + movem $r10,[$sp] - clear.d r0 - clear.d r1 - clear.d r2 - clear.d r3 - clear.d r4 - clear.d r5 - clear.d r6 - clear.d r7 - clear.d r8 - clear.d r9 - clear.d r10 - clear.d r11 + clear.d $r0 + clear.d $r1 + clear.d $r2 + clear.d $r3 + clear.d $r4 + clear.d $r5 + clear.d $r6 + clear.d $r7 + clear.d $r8 + clear.d $r9 + clear.d $r10 + clear.d $r11 ;; Now we've got this: ;; r13 - dst ;; r12 - n ;; Update n for the first loop - subq 12*4,r12 + subq 12*4,$r12 0: - subq 12*4,r12 + subq 12*4,$r12 bge 0b - movem r11,[r13+] + movem $r11,[$r13+] 1: - addq 12*4,r12 ;; compensate for last loop underflowing n + addq 12*4,$r12 ;; compensate for last loop underflowing n ;; Restore registers from stack - movem [sp+],r10 + movem [$sp+],$r10 2: .section .fixup,\"ax\" 3: - move.d [sp],r10 - addq 12*4,r10 - move.d r10,[sp] - clear.d r10 + move.d [$sp],$r10 + addq 12*4,$r10 + move.d $r10,[$sp] + clear.d $r10 jump 0b 4: - movem [sp+],r10 - addq 12*4,r10 - addq 12*4,r12 + movem [$sp+],$r10 + addq 12*4,$r10 + addq 12*4,$r12 jump 2b .previous |