|
From: Andy P. <at...@us...> - 2002-04-09 17:08:27
|
Update of /cvsroot/linux-vax/kernel-2.4/arch/cris/lib
In directory usw-pr-cvs1:/tmp/cvs-serv13825/cris/lib
Modified Files:
Makefile checksum.S checksumcopy.S dmacopy.c memset.c
old_checksum.c string.c usercopy.c
Added Files:
csumcpfruser.S dram_init.S hw_settings.S
Log Message:
synch 2.4.15 commit 29
--- NEW FILE ---
/*
* Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
* csum_partial_copy_from_user by adding exception records.
*
* Copyright (C) 2001 Axis Communications AB.
*
* Author: Hans-Peter Nilsson.
*/
#include <asm/errno.h>
/* Same function body, but a different name. If we just added exception
records to _csum_partial_copy_nocheck and made it generic, we wouldn't
know a user fault from a kernel fault and we would have overhead in
each kernel caller for the error-pointer argument.
unsigned int csum_partial_copy_from_user
(const char *src, char *dst, int len, unsigned int sum, int *errptr);
Note that the errptr argument is only set if we encounter an error.
It is conveniently located on the stack, so the normal function body
does not have to handle it. */
#define csum_partial_copy_nocheck csum_partial_copy_from_user
/* There are local labels numbered 1, 2 and 3 present to mark the
different from-user accesses. */
#include "checksumcopy.S"
.section .fixup,"ax"
;; Here from the movem loop; restore stack.
4:
movem [$sp+],$r8
;; r12 is already decremented. Add back chunk_size-2.
addq 40-2,$r12
;; Here from the word loop; r12 is off by 2; add it back.
5:
addq 2,$r12
;; Here from a failing single byte.
6:
;; Signal in *errptr that we had a failing access.
moveq -EFAULT,$r9
move.d $r9,[[$sp]]
;; Clear the rest of the destination area using memset. Preserve the
;; checksum for the readable bytes.
push $srp
push $r13
move.d $r11,$r10
clear.d $r11
jsr memset
pop $r10
jump [$sp+]
.previous
.section __ex_table,"a"
.dword 1b,4b
.dword 2b,5b
.dword 3b,6b
.previous
--- NEW FILE ---
/* $Id: dram_init.S,v 1.1 2002/04/09 17:03:16 atp Exp $
*
* DRAM/SDRAM initialization - alter with care
* This file is intended to be included from other assembler files
*
* Note: This file may not modify r9 because r9 is used to carry
* information from the decompresser to the kernel
*
* Copyright (C) 2000, 2001 Axis Communications AB
*
* Authors: Mikael Starvik (st...@ax...)
*
* $Log: dram_init.S,v $
* Revision 1.1 2002/04/09 17:03:16 atp
* synch 2.4.15 commit 29
*
* Revision 1.10 2001/10/04 12:00:21 martinnn
* Added missing underscores.
*
* Revision 1.9 2001/10/01 14:47:35 bjornw
* Added register prefixes and removed underscores
*
* Revision 1.8 2001/05/15 07:12:45 hp
* Copy warning from head.S about r8 and r9
*
* Revision 1.7 2001/04/18 12:05:39 bjornw
* Fixed comments, and explicitely include config.h to be sure its there
*
* Revision 1.6 2001/04/10 06:20:16 starvik
* Delay should be 200us, not 200ns
*
* Revision 1.5 2001/04/09 06:01:13 starvik
* Added support for 100 MHz SDRAMs
*
* Revision 1.4 2001/03/26 14:24:01 bjornw
* Namechange of some config options
*
* Revision 1.3 2001/03/23 08:29:41 starvik
* Corrected calculation of mrs_data
*
* Revision 1.2 2001/02/08 15:20:00 starvik
* Corrected SDRAM initialization
* Should now be included as inline
*
* Revision 1.1 2001/01/29 13:08:02 starvik
* Initial version
* This file should be included from all assembler files that needs to
* initialize DRAM/SDRAM.
*
*/
/* Just to be certain the config file is included, we include it here
* explicitely instead of depending on it being included in the file that
* uses this code.
*/
#include <linux/config.h>
;; WARNING! The registers r8 and r9 are used as parameters carrying
;; information from the decompressor (if the kernel was compressed).
;; They should not be used in the code below.
#ifndef CONFIG_SVINTO_SIM
move.d CONFIG_ETRAX_DEF_R_WAITSTATES, $r0
move.d $r0, [R_WAITSTATES]
move.d CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0
move.d $r0, [R_BUS_CONFIG]
#ifndef CONFIG_ETRAX_SDRAM
move.d CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0
move.d $r0, [R_DRAM_CONFIG]
move.d CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0
move.d $r0, [R_DRAM_TIMING]
#else
; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization
; Bank configuration
move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0
move.d $r0, [R_SDRAM_CONFIG]
; Calculate value of mrs_data
; CAS latency = 2 && bus_width = 32 => 0x40
; CAS latency = 3 && bus_width = 32 => 0x60
; CAS latency = 2 && bus_width = 16 => 0x20
; CAS latency = 3 && bus_width = 16 => 0x30
move.d 0x40, $r2 ; Assume 32 bits and CAS latency = 2
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
move.d $r1, $r3
and.d 0x03, $r1 ; Get CAS latency
and.d 0x1000, $r3 ; 50 or 100 MHz?
beq _speed_50
nop
_speed_100:
cmp.d 0x00, $r1 ; CAS latency = 2?
beq _bw_check
nop
or.d 0x20, $r2 ; CAS latency = 3
ba _bw_check
nop
_speed_50:
cmp.d 0x01, $r1 ; CAS latency = 2?
beq _bw_check
nop
or.d 0x20, $r2 ; CAS latency = 3
_bw_check:
move.d CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1
and.d 0x800000, $r1 ; DRAM width is bit 23
bne _set_timing
nop
lsrq 1, $r2 ; 16 bits. Shift down value.
; Set timing parameters. Starts master clock
_set_timing:
move.d CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
and.d 0x8000f9ff, $r1 ; Make sure mrs data and command is 0
or.d 0x80000000, $r1 ; Make sure sdram enable bit is set
move.d $r1, $r5
or.d 0x0000c000, $r1 ; ref = disable
lslq 16, $r2 ; mrs data starts at bit 16
or.d $r2, $r1
move.d $r1, [R_SDRAM_TIMING]
; Wait 200us
move.d 10000, $r2
1: bne 1b
subq 1, $r2
; Issue initialization command sequence
move.d _sdram_commands_start, $r2
move.d _sdram_commands_end, $r3
1: clear.d $r4
move.b [$r2+], $r4
lslq 9, $r4 ; Command starts at bit 9
or.d $r1, $r4
move.d $r4, [R_SDRAM_TIMING]
nop ; Wait five nop cycles between each command
nop
nop
nop
nop
cmp.d $r2, $r3
bne 1b
nop
move.d $r5, [R_SDRAM_TIMING]
ba _sdram_commands_end
nop
_sdram_commands_start:
.byte 3 ; Precharge
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 2 ; refresh
.byte 0 ; nop
.byte 1 ; mrs
.byte 0 ; nop
_sdram_commands_end:
#endif
#endif
--- NEW FILE ---
/*
* $Id: hw_settings.S,v 1.1 2002/04/09 17:03:16 atp Exp $
*
* This table is used by some tools to extract hardware parameters.
* The table should be included in the kernel and the decompressor.
* Don't forget to update the tools if you change this table.
*
* Copyright (C) 2001 Axis Communications AB
*
* Authors: Mikael Starvik (st...@ax...)
*/
#define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \
(CONFIG_ETRAX_DEF_R_PORT_PA_DATA))
#define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \
(CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \
(CONFIG_ETRAX_DEF_R_PORT_PB_DATA))
.ascii "HW_PARAM_MAGIC" ; Magic number
.dword 0xc0004000 ; Kernel start address
; Debug port
#ifdef CONFIG_ETRAX_DEBUG_PORT0
.dword 0
#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
.dword 1
#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
.dword 2
#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
.dword 3
#else
.dword 4 ; No debug
#endif
; SDRAM or EDO DRAM?
#ifdef CONFIG_ETRAX_SDRAM
.dword 1
#else
.dword 0
#endif
; Register values
.dword R_WAITSTATES
.dword CONFIG_ETRAX_DEF_R_WAITSTATES
.dword R_BUS_CONFIG
.dword CONFIG_ETRAX_DEF_R_BUS_CONFIG
#ifdef CONFIG_ETRAX_SDRAM
.dword R_SDRAM_CONFIG
.dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG
.dword R_SDRAM_TIMING
.dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING
#else
.dword R_DRAM_CONFIG
.dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG
.dword R_DRAM_TIMING
.dword CONFIG_ETRAX_DEF_R_DRAM_TIMING
#endif
.dword R_PORT_PA_SET
.dword PA_SET_VALUE
.dword R_PORT_PB_SET
.dword PB_SET_VALUE
.dword 0 ; No more register values
Index: Makefile
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/Makefile,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- Makefile 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ Makefile 9 Apr 2002 17:03:16 -0000 1.2
@@ -6,6 +6,6 @@
$(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o
L_TARGET = lib.a
-obj-y = checksum.o checksumcopy.o string.o usercopy.o memset.o
+obj-y = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o
include $(TOPDIR)/Rules.make
Index: checksum.S
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksum.S,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- checksum.S 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ checksum.S 9 Apr 2002 17:03:16 -0000 1.2
@@ -1,113 +1,124 @@
- ;; $Id$
- ;; A fast checksum routine using movem
- ;; Copyright (c) 1998 Bjorn Wesen/Axis Communications AB
+/* $Id$
+ * A fast checksum routine using movem
+ * Copyright (c) 1998-2001 Axis Communications AB
+ *
+ * csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
- ;; csum_partial(const unsigned char * buff, int len, unsigned int sum)
-
- .globl _csum_partial
-_csum_partial:
+ .globl csum_partial
+csum_partial:
+ ;; r10 - src
+ ;; r11 - length
+ ;; r12 - checksum
+
;; check for breakeven length between movem and normal word looping versions
+ ;; we also do _NOT_ want to compute a checksum over more than the
+ ;; actual length when length < 40
- cmpu.w 80,r11
- bcs no_movem
+ cmpu.w 80,$r11
+ blo _word_loop
nop
;; need to save the registers we use below in the movem loop
;; this overhead is why we have a check above for breakeven length
+ ;; only r0 - r8 have to be saved, the other ones are clobber-able
+ ;; according to the ABI
- subq 9*4,sp
- movem r8,[sp]
+ subq 9*4,$sp
+ movem $r8,[$sp]
;; do a movem checksum
- ;; r10 - src
- ;; r11 - length
- ;; r12 - checksum
-
- subq 10*4,r11 ; update length for the first loop
+ subq 10*4,$r11 ; update length for the first loop
-mloop: movem [r10+],r9 ; read 10 longwords
+_mloop: movem [$r10+],$r9 ; read 10 longwords
;; perform dword checksumming on the 10 longwords
- add.d r0,r12
+ add.d $r0,$r12
ax
- add.d r1,r12
+ add.d $r1,$r12
ax
- add.d r2,r12
+ add.d $r2,$r12
ax
- add.d r3,r12
+ add.d $r3,$r12
ax
- add.d r4,r12
+ add.d $r4,$r12
ax
- add.d r5,r12
+ add.d $r5,$r12
ax
- add.d r6,r12
+ add.d $r6,$r12
ax
- add.d r7,r12
+ add.d $r7,$r12
ax
- add.d r8,r12
+ add.d $r8,$r12
ax
- add.d r9,r12
+ add.d $r9,$r12
;; fold the carry into the checksum, to avoid having to loop the carry
;; back into the top
ax
- addq 0,r12
+ addq 0,$r12
ax ; do it again, since we might have generated a carry
- addq 0,r12
+ addq 0,$r12
- subq 10*4,r11
- bge mloop
+ subq 10*4,$r11
+ bge _mloop
nop
- addq 10*4,r11 ; compensate for last loop underflowing length
+ addq 10*4,$r11 ; compensate for last loop underflowing length
- ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
-
- moveq -1,r1 ; put 0xffff in r1, faster than move.d 0xffff,r1
- lsrq 16,r1
-
- move.d r12,r0
- lsrq 16,r0 ; r0 = checksum >> 16
- and.d r1,r12 ; checksum = checksum & 0xffff
- add.d r0,r12 ; checksum += r0
- move.d r12,r0 ; do the same again, maybe we got a carry last add
- lsrq 16,r0
- and.d r1,r12
- add.d r0,r12
-
- movem [sp+],r8 ; restore regs
-
-no_movem:
- cmpq 2,r11
- blt no_words
+ movem [$sp+],$r8 ; restore regs
+
+_word_loop:
+ ;; only fold if there is anything to fold.
+
+ cmpq 0,$r12
+ beq _no_fold
+
+ ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
+ ;; r9 and r13 can be used as temporaries.
+
+ moveq -1,$r9 ; put 0xffff in r9, faster than move.d 0xffff,r9
+ lsrq 16,$r9
+
+ move.d $r12,$r13
+ lsrq 16,$r13 ; r13 = checksum >> 16
+ and.d $r9,$r12 ; checksum = checksum & 0xffff
+ add.d $r13,$r12 ; checksum += r13
+ move.d $r12,$r13 ; do the same again, maybe we got a carry last add
+ lsrq 16,$r13
+ and.d $r9,$r12
+ add.d $r13,$r12
+
+_no_fold:
+ cmpq 2,$r11
+ blt _no_words
nop
;; checksum the rest of the words
- subq 2,r11
+ subq 2,$r11
-wloop: subq 2,r11
- bge wloop
- addu.w [r10+],r12
+_wloop: subq 2,$r11
+ bge _wloop
+ addu.w [$r10+],$r12
- addq 2,r11
+ addq 2,$r11
-no_words:
+_no_words:
;; see if we have one odd byte more
- cmpq 1,r11
- beq do_byte
+ cmpq 1,$r11
+ beq _do_byte
nop
ret
- move.d r12, r10
+ move.d $r12, $r10
-do_byte:
+_do_byte:
;; copy and checksum the last byte
- addu.b [r10],r12
+ addu.b [$r10],$r12
ret
- move.d r12, r10
+ move.d $r12, $r10
-
\ No newline at end of file
Index: checksumcopy.S
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksumcopy.S,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- checksumcopy.S 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ checksumcopy.S 9 Apr 2002 17:03:16 -0000 1.2
@@ -1,120 +1,132 @@
- ;; $Id$
- ;; A fast checksum+copy routine using movem
- ;; Copyright (c) 1998, 2000 Axis Communications AB
- ;;
- ;; Authors: Bjorn Wesen
- ;;
- ;; csum_partial_copy_nocheck(const char *src, char *dst,
- ;; int len, unsigned int sum)
+/* $Id$
+ * A fast checksum+copy routine using movem
+ * Copyright (c) 1998, 2001 Axis Communications AB
+ *
+ * Authors: Bjorn Wesen
+ *
+ * csum_partial_copy_nocheck(const char *src, char *dst,
+ * int len, unsigned int sum)
+ */
- .globl _csum_partial_copy_nocheck
-_csum_partial_copy_nocheck:
+ .globl csum_partial_copy_nocheck
+csum_partial_copy_nocheck:
+ ;; r10 - src
+ ;; r11 - dst
+ ;; r12 - length
+ ;; r13 - checksum
+
;; check for breakeven length between movem and normal word looping versions
+ ;; we also do _NOT_ want to compute a checksum over more than the
+ ;; actual length when length < 40
- cmpu.w 80,r12
- bcs no_movem
+ cmpu.w 80, $r12
+ blo _word_loop
nop
;; need to save the registers we use below in the movem loop
;; this overhead is why we have a check above for breakeven length
+ ;; only r0 - r8 have to be saved, the other ones are clobber-able
+ ;; according to the ABI
- subq 9*4,sp
- movem r8,[sp]
+ subq 9*4, $sp
+ movem $r8, [$sp]
;; do a movem copy and checksum
- ;; r10 - src
- ;; r11 - dst
- ;; r12 - length
- ;; r13 - checksum
-
- subq 10*4,r12 ; update length for the first loop
+ subq 10*4, $r12 ; update length for the first loop
-mloop: movem [r10+],r9 ; read 10 longwords
- movem r9,[r11+] ; write 10 longwords
+_mloop: movem [$r10+],$r9 ; read 10 longwords
+1: ;; A failing userspace access will have this as PC.
+ movem $r9,[$r11+] ; write 10 longwords
;; perform dword checksumming on the 10 longwords
- add.d r0,r13
+ add.d $r0,$r13
ax
- add.d r1,r13
+ add.d $r1,$r13
ax
- add.d r2,r13
+ add.d $r2,$r13
ax
- add.d r3,r13
+ add.d $r3,$r13
ax
- add.d r4,r13
+ add.d $r4,$r13
ax
- add.d r5,r13
+ add.d $r5,$r13
ax
- add.d r6,r13
+ add.d $r6,$r13
ax
- add.d r7,r13
+ add.d $r7,$r13
ax
- add.d r8,r13
+ add.d $r8,$r13
ax
- add.d r9,r13
+ add.d $r9,$r13
;; fold the carry into the checksum, to avoid having to loop the carry
;; back into the top
ax
- addq 0,r13
+ addq 0,$r13
+ ax ; do it again, since we might have generated a carry
+ addq 0,$r13
- subq 10*4,r12
- bge mloop
+ subq 10*4,$r12
+ bge _mloop
nop
- addq 10*4,r12 ; compensate for last loop underflowing length
+ addq 10*4,$r12 ; compensate for last loop underflowing length
+
+ movem [$sp+],$r8 ; restore regs
+
+_word_loop:
+ ;; only fold if there is anything to fold.
+
+ cmpq 0,$r13
+ beq _no_fold
;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
+ ;; r9 can be used as temporary.
- moveq -1,r1 ; put 0xffff in r1, faster than move.d 0xffff,r1
- lsrq 16,r1
-
- move.d r13,r0
- lsrq 16,r0 ; r0 = checksum >> 16
- and.d r1,r13 ; checksum = checksum & 0xffff
- add.d r0,r13 ; checksum += r0
- move.d r13,r0 ; do the same again, maybe we got a carry last add
- lsrq 16,r0
- and.d r1,r13
- add.d r0,r13
-
- movem [sp+],r8 ; restore regs
-
-no_movem:
- cmpq 2,r12
- blt no_words
+ move.d $r13,$r9
+ lsrq 16,$r9 ; r0 = checksum >> 16
+ and.d 0xffff,$r13 ; checksum = checksum & 0xffff
+ add.d $r9,$r13 ; checksum += r0
+ move.d $r13,$r9 ; do the same again, maybe we got a carry last add
+ lsrq 16,$r9
+ and.d 0xffff,$r13
+ add.d $r9,$r13
+
+_no_fold:
+ cmpq 2,$r12
+ blt _no_words
nop
;; copy and checksum the rest of the words
- subq 2,r12
+ subq 2,$r12
-wloop: move.w [r10+],r9
- addu.w r9,r13
- subq 2,r12
- bge wloop
- move.w r9,[r11+]
+_wloop: move.w [$r10+],$r9
+2: ;; A failing userspace access will have this as PC.
+ addu.w $r9,$r13
+ subq 2,$r12
+ bge _wloop
+ move.w $r9,[$r11+]
- addq 2,r12
+ addq 2,$r12
-no_words:
+_no_words:
;; see if we have one odd byte more
- cmpq 1,r12
- beq do_byte
+ cmpq 1,$r12
+ beq _do_byte
nop
ret
- move.d r13, r10
+ move.d $r13, $r10
-do_byte:
+_do_byte:
;; copy and checksum the last byte
- move.b [r10],r9
- addu.b r9,r13
- move.b r9,[r11]
+ move.b [$r10],$r9
+3: ;; A failing userspace access will have this as PC.
+ addu.b $r9,$r13
+ move.b $r9,[$r11]
ret
- move.d r13, r10
-
-
\ No newline at end of file
+ move.d $r13, $r10
Index: dmacopy.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/dmacopy.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
Index: memset.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/memset.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- memset.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ memset.c 9 Apr 2002 17:03:16 -0000 1.2
@@ -27,6 +27,8 @@
/*# */
/*#-------------------------------------------------------------------------*/
+#include <linux/types.h>
+
/* No, there's no macro saying 12*4, since it is "hard" to get it into
the asm in a good way. Thus better to expose the problem everywhere.
*/
@@ -39,7 +41,7 @@
void *memset(void *pdst,
int c,
- unsigned int plen)
+ size_t plen)
{
/* Ok. Now we want the parameters put in special registers.
Make sure the compiler is able to make something useful of this. */
@@ -54,7 +56,12 @@
/* Ugh. This is fragile at best. Check with newer GCC releases, if
they compile cascaded "x |= x << 8" sanely! */
- __asm__("movu.b %0,r13\n\tlslq 8,r13\n\tmove.b %0,r13\n\tmove.d r13,%0\n\tlslq 16,r13\n\tor.d r13,%0"
+ __asm__("movu.b %0,$r13\n\t"
+ "lslq 8,$r13\n\t"
+ "move.b %0,$r13\n\t"
+ "move.d $r13,%0\n\t"
+ "lslq 16,$r13\n\t"
+ "or.d $r13,%0"
: "=r" (lc) : "0" (lc) : "r13");
{
@@ -111,36 +118,36 @@
;; Save the registers we'll clobber in the movem process
;; on the stack. Don't mention them to gcc, it will only be
;; upset.
- subq 11*4,sp
- movem r10,[sp]
+ subq 11*4,$sp
+ movem $r10,[$sp]
- move.d r11,r0
- move.d r11,r1
- move.d r11,r2
- move.d r11,r3
- move.d r11,r4
- move.d r11,r5
- move.d r11,r6
- move.d r11,r7
- move.d r11,r8
- move.d r11,r9
- move.d r11,r10
+ move.d $r11,$r0
+ move.d $r11,$r1
+ move.d $r11,$r2
+ move.d $r11,$r3
+ move.d $r11,$r4
+ move.d $r11,$r5
+ move.d $r11,$r6
+ move.d $r11,$r7
+ move.d $r11,$r8
+ move.d $r11,$r9
+ move.d $r11,$r10
;; Now we've got this:
;; r13 - dst
;; r12 - n
;; Update n for the first loop
- subq 12*4,r12
+ subq 12*4,$r12
0:
- subq 12*4,r12
+ subq 12*4,$r12
bge 0b
- movem r11,[r13+]
+ movem $r11,[$r13+]
- addq 12*4,r12 ;; compensate for last loop underflowing n
+ addq 12*4,$r12 ;; compensate for last loop underflowing n
;; Restore registers from stack
- movem [sp+],r10"
+ movem [$sp+],$r10"
/* Outputs */ : "=r" (dst), "=r" (n)
/* Inputs */ : "0" (dst), "1" (n), "r" (lc));
Index: old_checksum.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/old_checksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
Index: string.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/string.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- string.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ string.c 9 Apr 2002 17:03:16 -0000 1.2
@@ -31,9 +31,11 @@
/*# */
/*#-------------------------------------------------------------------------*/
+#include <linux/types.h>
+
void *memcpy(void *pdst,
const void *psrc,
- unsigned int pn)
+ size_t pn)
{
/* Ok. Now we want the parameters put in special registers.
Make sure the compiler is able to make something useful of this.
@@ -100,8 +102,8 @@
;;
;; Save the registers we'll use in the movem process
;; on the stack.
- subq 11*4,sp
- movem r10,[sp]
+ subq 11*4,$sp
+ movem $r10,[$sp]
;; Now we've got this:
;; r11 - src
@@ -109,17 +111,17 @@
;; r12 - n
;; Update n for the first loop
- subq 44,r12
+ subq 44,$r12
0:
- movem [r11+],r10
- subq 44,r12
+ movem [$r11+],$r10
+ subq 44,$r12
bge 0b
- movem r10,[r13+]
+ movem $r10,[$r13+]
- addq 44,r12 ;; compensate for last loop underflowing n
+ addq 44,$r12 ;; compensate for last loop underflowing n
;; Restore registers from stack
- movem [sp+],r10"
+ movem [$sp+],$r10"
/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
/* Inputs */ : "0" (dst), "1" (src), "2" (n));
Index: usercopy.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/usercopy.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- usercopy.c 25 Feb 2001 23:15:23 -0000 1.1.1.1
+++ usercopy.c 9 Apr 2002 17:03:16 -0000 1.2
@@ -95,8 +95,8 @@
;;
;; Save the registers we'll use in the movem process
;; on the stack.
- subq 11*4,sp
- movem r10,[sp]
+ subq 11*4,$sp
+ movem $r10,[$sp]
;; Now we've got this:
;; r11 - src
@@ -104,7 +104,7 @@
;; r12 - n
;; Update n for the first loop
- subq 44,r12
+ subq 44,$r12
; Since the noted PC of a faulting instruction in a delay-slot of a taken
; branch, is that of the branch target, we actually point at the from-movem
@@ -113,15 +113,15 @@
; after *that* movem.
0:
- movem [r11+],r10
- subq 44,r12
+ movem [$r11+],$r10
+ subq 44,$r12
bge 0b
- movem r10,[r13+]
+ movem $r10,[$r13+]
1:
- addq 44,r12 ;; compensate for last loop underflowing n
+ addq 44,$r12 ;; compensate for last loop underflowing n
;; Restore registers from stack
- movem [sp+],r10
+ movem [$sp+],$r10
2:
.section .fixup,\"ax\"
@@ -130,14 +130,14 @@
; performance penalty for sany use; the program will segfault soon enough.
3:
- move.d [sp],r10
- addq 44,r10
- move.d r10,[sp]
+ move.d [$sp],$r10
+ addq 44,$r10
+ move.d $r10,[$sp]
jump 0b
4:
- movem [sp+],r10
- addq 44,r10
- addq 44,r12
+ movem [$sp+],$r10
+ addq 44,$r10
+ addq 44,$r12
jump 2b
.previous
@@ -255,8 +255,8 @@
;;
;; Save the registers we'll use in the movem process
;; on the stack.
- subq 11*4,sp
- movem r10,[sp]
+ subq 11*4,$sp
+ movem $r10,[$sp]
;; Now we've got this:
;; r11 - src
@@ -264,42 +264,82 @@
;; r12 - n
;; Update n for the first loop
- subq 44,r12
+ subq 44,$r12
0:
- movem [r11+],r10
+ movem [$r11+],$r10
1:
- subq 44,r12
+ subq 44,$r12
bge 0b
- movem r10,[r13+]
-
- addq 44,r12 ;; compensate for last loop underflowing n
+ movem $r10,[$r13+]
+ addq 44,$r12 ;; compensate for last loop underflowing n
+8:
;; Restore registers from stack
- movem [sp+],r10
+ movem [$sp+],$r10
.section .fixup,\"ax\"
-; To provide a correct count in r10 of bytes that failed to be copied,
-; we jump back into the loop if the loop-branch was taken.
-; There is no performance penalty; the program will segfault soon
-; enough.
+;; Do not jump back into the loop if we fail. For some uses, we get a
+;; page fault but for performance reasons we care to not get further
+;; faults. For example, fs/super.c at one time did
+;; i = size - copy_from_user((void *)page, data, size);
+;; which would cause repeated faults while clearing the remainder of
+;; the SIZE bytes at PAGE after the first fault.
3:
- move.d [sp],r10
- addq 44,r10
- move.d r10,[sp]
- clear.d r0
- clear.d r1
- clear.d r2
- clear.d r3
- clear.d r4
- clear.d r5
- clear.d r6
- clear.d r7
- clear.d r8
- clear.d r9
- clear.d r10
- jump 1b
+ move.d [$sp],$r10
+
+;; Number of remaining bytes, cleared but not copied, is r12 + 44.
+
+ add.d $r12,$r10
+ addq 44,$r10
+
+ move.d $r10,[$sp]
+ clear.d $r0
+ clear.d $r1
+ clear.d $r2
+ clear.d $r3
+ clear.d $r4
+ clear.d $r5
+ clear.d $r6
+ clear.d $r7
+ clear.d $r8
+ clear.d $r9
+ clear.d $r10
+
+;; Perform clear similar to the copy-loop.
+
+4:
+ subq 44,$r12
+ bge 4b
+ movem $r10,[$r13+]
+
+;; Clear by four for the remaining multiples.
+
+ addq 40,$r12
+ bmi 6f
+ nop
+5:
+ subq 4,$r12
+ bpl 5b
+ clear.d [$r13+]
+6:
+ addq 4,$r12
+ beq 7f
+ nop
+
+ subq 1,$r12
+ beq 7f
+ clear.b [$r13+]
+
+ subq 1,$r12
+ beq 7f
+ clear.b [$r13+]
+
+ clear.d $r12
+ clear.b [$r13+]
+7:
+ jump 8b
.previous
.section __ex_table,\"a\"
@@ -411,50 +451,50 @@
;; Save the registers we'll clobber in the movem process
;; on the stack. Don't mention them to gcc, it will only be
;; upset.
- subq 11*4,sp
- movem r10,[sp]
+ subq 11*4,$sp
+ movem $r10,[$sp]
- clear.d r0
- clear.d r1
- clear.d r2
- clear.d r3
- clear.d r4
- clear.d r5
- clear.d r6
- clear.d r7
- clear.d r8
- clear.d r9
- clear.d r10
- clear.d r11
+ clear.d $r0
+ clear.d $r1
+ clear.d $r2
+ clear.d $r3
+ clear.d $r4
+ clear.d $r5
+ clear.d $r6
+ clear.d $r7
+ clear.d $r8
+ clear.d $r9
+ clear.d $r10
+ clear.d $r11
;; Now we've got this:
;; r13 - dst
;; r12 - n
;; Update n for the first loop
- subq 12*4,r12
+ subq 12*4,$r12
0:
- subq 12*4,r12
+ subq 12*4,$r12
bge 0b
- movem r11,[r13+]
+ movem $r11,[$r13+]
1:
- addq 12*4,r12 ;; compensate for last loop underflowing n
+ addq 12*4,$r12 ;; compensate for last loop underflowing n
;; Restore registers from stack
- movem [sp+],r10
+ movem [$sp+],$r10
2:
.section .fixup,\"ax\"
3:
- move.d [sp],r10
- addq 12*4,r10
- move.d r10,[sp]
- clear.d r10
+ move.d [$sp],$r10
+ addq 12*4,$r10
+ move.d $r10,[$sp]
+ clear.d $r10
jump 0b
4:
- movem [sp+],r10
- addq 12*4,r10
- addq 12*4,r12
+ movem [$sp+],$r10
+ addq 12*4,$r10
+ addq 12*4,$r12
jump 2b
.previous
|