[LV-kern-commit] CVS: kernel-2.4/arch/cris/lib csumcpfruser.S,NONE,1.1 dram_init.S,NONE,1.1 hw_setti

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/linux-vax/kernel-2.4/arch/cris/lib
In directory usw-pr-cvs1:/tmp/cvs-serv13825/cris/lib

Modified Files:
	Makefile checksum.S checksumcopy.S dmacopy.c memset.c 
	old_checksum.c string.c usercopy.c 
Added Files:
	csumcpfruser.S dram_init.S hw_settings.S 
Log Message:
synch 2.4.15 commit 29

--- NEW FILE ---
/*
 * Add-on to transform csum_partial_copy_nocheck in checksumcopy.S into
 * csum_partial_copy_from_user by adding exception records.
 *
 * Copyright (C) 2001 Axis Communications AB.
 *
 * Author: Hans-Peter Nilsson.
 */

#include <asm/errno.h>

/* Same function body, but a different name.  If we just added exception
   records to _csum_partial_copy_nocheck and made it generic, we wouldn't
   know a user fault from a kernel fault and we would have overhead in
   each kernel caller for the error-pointer argument.

   unsigned int csum_partial_copy_from_user
     (const char *src, char *dst, int len, unsigned int sum, int *errptr);

   Note that the errptr argument is only set if we encounter an error.
   It is conveniently located on the stack, so the normal function body
   does not have to handle it.  */

#define csum_partial_copy_nocheck csum_partial_copy_from_user

/* There are local labels numbered 1, 2 and 3 present to mark the
   different from-user accesses.  */
#include "checksumcopy.S"

	.section .fixup,"ax"

;; Here from the movem loop; restore stack.
4:
	movem	[$sp+],$r8
;; r12 is already decremented.  Add back chunk_size-2.
	addq	40-2,$r12

;; Here from the word loop; r12 is off by 2; add it back.
5:
	addq	2,$r12

;; Here from a failing single byte.
6:

;; Signal in *errptr that we had a failing access.
	moveq	-EFAULT,$r9
	move.d	$r9,[[$sp]]

;; Clear the rest of the destination area using memset.  Preserve the
;; checksum for the readable bytes.
	push	$srp
	push	$r13
	move.d	$r11,$r10
	clear.d	$r11
	jsr	memset
	pop	$r10
	jump	[$sp+]

	.previous
	.section __ex_table,"a"
	.dword 1b,4b
	.dword 2b,5b
	.dword 3b,6b
	.previous

--- NEW FILE ---
/* $Id: dram_init.S,v 1.1 2002/04/09 17:03:16 atp Exp $
 * 
 * DRAM/SDRAM initialization - alter with care
 * This file is intended to be included from other assembler files
 *
 * Note: This file may not modify r9 because r9 is used to carry
 *       information from the decompresser to the kernel
 *
 * Copyright (C) 2000, 2001 Axis Communications AB
 *
 * Authors:  Mikael Starvik (st...@ax...)	
 * 
 * $Log: dram_init.S,v $
 * Revision 1.1  2002/04/09 17:03:16  atp
 * synch 2.4.15 commit 29
 *
 * Revision 1.10  2001/10/04 12:00:21  martinnn
 * Added missing underscores.
 *
 * Revision 1.9  2001/10/01 14:47:35  bjornw
 * Added register prefixes and removed underscores
 *
 * Revision 1.8  2001/05/15 07:12:45  hp
 * Copy warning from head.S about r8 and r9
 *
 * Revision 1.7  2001/04/18 12:05:39  bjornw
 * Fixed comments, and explicitely include config.h to be sure its there
 *
 * Revision 1.6  2001/04/10 06:20:16  starvik
 * Delay should be 200us, not 200ns
 *
 * Revision 1.5  2001/04/09 06:01:13  starvik
 * Added support for 100 MHz SDRAMs
 *
 * Revision 1.4  2001/03/26 14:24:01  bjornw
 * Namechange of some config options
 *
 * Revision 1.3  2001/03/23 08:29:41  starvik
 * Corrected calculation of mrs_data
 *
 * Revision 1.2  2001/02/08 15:20:00  starvik
 * Corrected SDRAM initialization
 * Should now be included as inline
 *
 * Revision 1.1  2001/01/29 13:08:02  starvik
 * Initial version
 * This file should be included from all assembler files that needs to
 * initialize DRAM/SDRAM.
 *
 */

/* Just to be certain the config file is included, we include it here
 * explicitely instead of depending on it being included in the file that
 * uses this code.
 */

#include <linux/config.h>

	;; WARNING! The registers r8 and r9 are used as parameters carrying
	;; information from the decompressor (if the kernel was compressed). 
	;; They should not be used in the code below.

#ifndef CONFIG_SVINTO_SIM	
	move.d   CONFIG_ETRAX_DEF_R_WAITSTATES, $r0
	move.d   $r0, [R_WAITSTATES]

	move.d   CONFIG_ETRAX_DEF_R_BUS_CONFIG, $r0
	move.d   $r0, [R_BUS_CONFIG]
	
#ifndef CONFIG_ETRAX_SDRAM
	move.d   CONFIG_ETRAX_DEF_R_DRAM_CONFIG, $r0
	move.d   $r0, [R_DRAM_CONFIG]

	move.d   CONFIG_ETRAX_DEF_R_DRAM_TIMING, $r0
	move.d   $r0, [R_DRAM_TIMING]
#else	
	; Refer to ETRAX 100LX Designers Reference for a description of SDRAM initialization
	
	; Bank configuration
	move.d   CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r0
	move.d   $r0, [R_SDRAM_CONFIG]

	; Calculate value of mrs_data 
	; CAS latency = 2 && bus_width = 32 => 0x40
	; CAS latency = 3 && bus_width = 32 => 0x60
	; CAS latency = 2 && bus_width = 16 => 0x20
	; CAS latency = 3 && bus_width = 16 => 0x30
	
	move.d   0x40, $r2       ; Assume 32 bits and CAS latency = 2
	move.d   CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
	move.d   $r1, $r3
 	and.d    0x03, $r1       ; Get CAS latency
	and.d    0x1000, $r3     ; 50 or 100 MHz?
	beq      _speed_50
	nop
_speed_100:		
	cmp.d    0x00, $r1	; CAS latency = 2?
	beq      _bw_check
	nop
	or.d     0x20, $r2	; CAS latency = 3 
	ba       _bw_check
	nop
_speed_50:			
	cmp.d    0x01, $r1	; CAS latency = 2?
	beq      _bw_check
	nop
	or.d     0x20, $r2       ; CAS latency = 3
_bw_check:
	move.d   CONFIG_ETRAX_DEF_R_SDRAM_CONFIG, $r1
	and.d    0x800000, $r1	; DRAM width is bit 23
	bne      _set_timing
	nop
	lsrq     1, $r2		;  16 bits. Shift down value.

	; Set timing parameters. Starts master clock
_set_timing:
	move.d   CONFIG_ETRAX_DEF_R_SDRAM_TIMING, $r1
	and.d    0x8000f9ff, $r1 ; Make sure mrs data and command is 0 
	or.d     0x80000000, $r1	; Make sure sdram enable bit is set
	move.d   $r1, $r5
	or.d     0x0000c000, $r1 ; ref = disable
	lslq     16, $r2		; mrs data starts at bit 16
	or.d     $r2, $r1 
	move.d   $r1, [R_SDRAM_TIMING]	
		
	; Wait 200us
	move.d   10000, $r2
1:	bne      1b
	subq     1, $r2
	
	; Issue initialization command sequence
	move.d   _sdram_commands_start, $r2
	move.d   _sdram_commands_end,  $r3
1:	clear.d  $r4
	move.b   [$r2+], $r4
	lslq     9, $r4	; Command starts at bit 9
	or.d     $r1, $r4
	move.d   $r4, [R_SDRAM_TIMING]
	nop		; Wait five nop cycles between each command
	nop
	nop
	nop
	nop
	cmp.d    $r2, $r3
	bne      1b
	nop
	move.d   $r5, [R_SDRAM_TIMING]
	ba       _sdram_commands_end
	nop

_sdram_commands_start:
	.byte   3	; Precharge
	.byte   0       ; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   2	; refresh
	.byte   0	; nop
	.byte   1	; mrs
	.byte   0	; nop 
_sdram_commands_end:		
#endif
#endif

--- NEW FILE ---
/*
 * $Id: hw_settings.S,v 1.1 2002/04/09 17:03:16 atp Exp $
 * 
 * This table is used by some tools to extract hardware parameters.
 * The table should be included in the kernel and the decompressor.
 * Don't forget to update the tools if you change this table.
 *
 * Copyright (C) 2001 Axis Communications AB
 *
 * Authors:  Mikael Starvik (st...@ax...)	
 */

#define PA_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PA_DIR << 8) | \
		(CONFIG_ETRAX_DEF_R_PORT_PA_DATA))
#define PB_SET_VALUE ((CONFIG_ETRAX_DEF_R_PORT_PB_CONFIG << 16) | \
		(CONFIG_ETRAX_DEF_R_PORT_PB_DIR << 8) | \
		(CONFIG_ETRAX_DEF_R_PORT_PB_DATA))
	
	.ascii "HW_PARAM_MAGIC" ; Magic number
	.dword 0xc0004000	; Kernel start address

	; Debug port
#ifdef CONFIG_ETRAX_DEBUG_PORT0
	.dword 0		
#elif defined(CONFIG_ETRAX_DEBUG_PORT1)
	.dword 1
#elif defined(CONFIG_ETRAX_DEBUG_PORT2)
	.dword 2
#elif defined(CONFIG_ETRAX_DEBUG_PORT3)
	.dword 3
#else
	.dword 4 ; No debug
#endif			

	; SDRAM or EDO DRAM?
#ifdef CONFIG_ETRAX_SDRAM
	.dword 1
#else
	.dword 0
#endif

	; Register values 
	.dword R_WAITSTATES
	.dword CONFIG_ETRAX_DEF_R_WAITSTATES
	.dword R_BUS_CONFIG
	.dword CONFIG_ETRAX_DEF_R_BUS_CONFIG
#ifdef CONFIG_ETRAX_SDRAM
	.dword R_SDRAM_CONFIG
	.dword CONFIG_ETRAX_DEF_R_SDRAM_CONFIG
	.dword R_SDRAM_TIMING
	.dword CONFIG_ETRAX_DEF_R_SDRAM_TIMING
#else
	.dword R_DRAM_CONFIG
	.dword CONFIG_ETRAX_DEF_R_DRAM_CONFIG
	.dword R_DRAM_TIMING
	.dword CONFIG_ETRAX_DEF_R_DRAM_TIMING
#endif
	.dword R_PORT_PA_SET
	.dword PA_SET_VALUE 
	.dword R_PORT_PB_SET
	.dword PB_SET_VALUE
	.dword 0 ; No more register values

Index: Makefile
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/Makefile,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- Makefile	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ Makefile	9 Apr 2002 17:03:16 -0000	1.2
@@ -6,6 +6,6 @@
 	$(CC) -D__ASSEMBLY__ $(AFLAGS) -traditional -c $< -o $*.o
 
 L_TARGET = lib.a
-obj-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o
+obj-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o csumcpfruser.o
 
 include $(TOPDIR)/Rules.make

Index: checksum.S
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksum.S,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- checksum.S	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ checksum.S	9 Apr 2002 17:03:16 -0000	1.2
@@ -1,113 +1,124 @@
-	;; $Id$
-	;; A fast checksum routine using movem
-	;; Copyright (c) 1998 Bjorn Wesen/Axis Communications AB
+/* $Id$
+ * A fast checksum routine using movem
+ * Copyright (c) 1998-2001 Axis Communications AB
+ *
+ * csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
 
-	;; csum_partial(const unsigned char * buff, int len, unsigned int sum)
-	
-	.globl	_csum_partial
-_csum_partial:
+	.globl	csum_partial
+csum_partial:
 	
+	;; r10 - src
+	;; r11 - length
+	;; r12 - checksum
+
 	;; check for breakeven length between movem and normal word looping versions
+	;; we also do _NOT_ want to compute a checksum over more than the 
+	;; actual length when length < 40
 	
-	cmpu.w	80,r11
-	bcs	no_movem
+	cmpu.w	80,$r11
+	blo	_word_loop
 	nop
 
 	;; need to save the registers we use below in the movem loop
 	;; this overhead is why we have a check above for breakeven length
+	;; only r0 - r8 have to be saved, the other ones are clobber-able
+	;; according to the ABI
 	
-	subq	9*4,sp
-	movem	r8,[sp]
+	subq	9*4,$sp
+	movem	$r8,[$sp]
 	
 	;; do a movem checksum
 
-	;; r10 - src
-	;; r11 - length
-	;; r12 - checksum
-
-	subq	10*4,r11	; update length for the first loop
+	subq	10*4,$r11	; update length for the first loop
 	
-mloop:	movem	[r10+],r9	; read 10 longwords
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
 
 	;; perform dword checksumming on the 10 longwords
 	
-	add.d	r0,r12
+	add.d	$r0,$r12
 	ax
-	add.d	r1,r12
+	add.d	$r1,$r12
 	ax
-	add.d	r2,r12
+	add.d	$r2,$r12
 	ax
-	add.d	r3,r12
+	add.d	$r3,$r12
 	ax
-	add.d	r4,r12
+	add.d	$r4,$r12
 	ax
-	add.d	r5,r12
+	add.d	$r5,$r12
 	ax
-	add.d	r6,r12
+	add.d	$r6,$r12
 	ax
-	add.d	r7,r12
+	add.d	$r7,$r12
 	ax
-	add.d	r8,r12
+	add.d	$r8,$r12
 	ax
-	add.d	r9,r12
+	add.d	$r9,$r12
 
 	;; fold the carry into the checksum, to avoid having to loop the carry
 	;; back into the top
 	
 	ax
-	addq	0,r12
+	addq	0,$r12
 	ax			; do it again, since we might have generated a carry
-	addq	0,r12
+	addq	0,$r12
 
-	subq	10*4,r11
-	bge	mloop
+	subq	10*4,$r11
+	bge	_mloop
 	nop
 
-	addq	10*4,r11	; compensate for last loop underflowing length
+	addq	10*4,$r11	; compensate for last loop underflowing length
 
-	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
-	
-	moveq	-1,r1		; put 0xffff in r1, faster than move.d 0xffff,r1
-	lsrq	16,r1
-	
-	move.d	r12,r0
-	lsrq	16,r0		; r0 = checksum >> 16
-	and.d	r1,r12		; checksum = checksum & 0xffff
-	add.d	r0,r12		; checksum += r0
-	move.d	r12,r0		; do the same again, maybe we got a carry last add
-	lsrq	16,r0
-	and.d	r1,r12
-	add.d	r0,r12
-	
-	movem	[sp+],r8	; restore regs
-
-no_movem:
-	cmpq	2,r11
-	blt	no_words
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	;; only fold if there is anything to fold.
+
+	cmpq	0,$r12
+	beq	_no_fold
+
+	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
+	;; r9 and r13 can be used as temporaries.
+	
+	moveq	-1,$r9		; put 0xffff in r9, faster than move.d 0xffff,r9
+	lsrq	16,$r9
+	
+	move.d	$r12,$r13
+	lsrq	16,$r13		; r13 = checksum >> 16
+	and.d	$r9,$r12		; checksum = checksum & 0xffff
+	add.d	$r13,$r12		; checksum += r13
+	move.d	$r12,$r13		; do the same again, maybe we got a carry last add
+	lsrq	16,$r13
+	and.d	$r9,$r12
+	add.d	$r13,$r12
+
+_no_fold:
+	cmpq	2,$r11
+	blt	_no_words
 	nop
 	
 	;; checksum the rest of the words
 	
-	subq	2,r11
+	subq	2,$r11
 	
-wloop:	subq	2,r11
-	bge	wloop
-	addu.w	[r10+],r12
+_wloop:	subq	2,$r11
+	bge	_wloop
+	addu.w	[$r10+],$r12
 	
-	addq	2,r11
+	addq	2,$r11
 		
-no_words:
+_no_words:
 	;; see if we have one odd byte more
-	cmpq	1,r11
-	beq	do_byte
+	cmpq	1,$r11
+	beq	_do_byte
 	nop
 	ret
-	move.d	r12, r10
+	move.d	$r12, $r10
 
-do_byte:	
+_do_byte:	
 	;; copy and checksum the last byte
-	addu.b	[r10],r12
+	addu.b	[$r10],$r12
 	ret
-	move.d	r12, r10
+	move.d	$r12, $r10
 		
-	
\ No newline at end of file

Index: checksumcopy.S
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/checksumcopy.S,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- checksumcopy.S	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ checksumcopy.S	9 Apr 2002 17:03:16 -0000	1.2
@@ -1,120 +1,132 @@
-	;; $Id$
-	;; A fast checksum+copy routine using movem
-	;; Copyright (c) 1998, 2000 Axis Communications AB
-	;;
-	;; Authors:	Bjorn Wesen
-	;; 
-	;; csum_partial_copy_nocheck(const char *src, char *dst,
-	;;		             int len, unsigned int sum)
+/* $Id$
+ * A fast checksum+copy routine using movem
+ * Copyright (c) 1998, 2001 Axis Communications AB
+ *
+ * Authors:	Bjorn Wesen
+ * 
+ * csum_partial_copy_nocheck(const char *src, char *dst,
+ *		             int len, unsigned int sum)
+ */
 
-	.globl	_csum_partial_copy_nocheck
-_csum_partial_copy_nocheck:	
+	.globl	csum_partial_copy_nocheck
+csum_partial_copy_nocheck:	
 	
+	;; r10 - src
+	;; r11 - dst
+	;; r12 - length
+	;; r13 - checksum
+
 	;; check for breakeven length between movem and normal word looping versions
+	;; we also do _NOT_ want to compute a checksum over more than the 
+	;; actual length when length < 40
 	
-	cmpu.w	80,r12
-	bcs	no_movem
+	cmpu.w	80, $r12
+	blo	_word_loop
 	nop
 
 	;; need to save the registers we use below in the movem loop
 	;; this overhead is why we have a check above for breakeven length
+	;; only r0 - r8 have to be saved, the other ones are clobber-able
+	;; according to the ABI
 	
-	subq	9*4,sp
-	movem	r8,[sp]
+	subq	9*4, $sp
+	movem	$r8, [$sp]
 	
 	;; do a movem copy and checksum
 
-	;; r10 - src
-	;; r11 - dst
-	;; r12 - length
-	;; r13 - checksum
-
-	subq	10*4,r12	; update length for the first loop
+	subq	10*4, $r12	; update length for the first loop
 	
-mloop:	movem	[r10+],r9	; read 10 longwords
-	movem	r9,[r11+]	; write 10 longwords
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
+1:	;; A failing userspace access will have this as PC.
+	movem	$r9,[$r11+]	; write 10 longwords
 
 	;; perform dword checksumming on the 10 longwords
 	
-	add.d	r0,r13
+	add.d	$r0,$r13
 	ax
-	add.d	r1,r13
+	add.d	$r1,$r13
 	ax
-	add.d	r2,r13
+	add.d	$r2,$r13
 	ax
-	add.d	r3,r13
+	add.d	$r3,$r13
 	ax
-	add.d	r4,r13
+	add.d	$r4,$r13
 	ax
-	add.d	r5,r13
+	add.d	$r5,$r13
 	ax
-	add.d	r6,r13
+	add.d	$r6,$r13
 	ax
-	add.d	r7,r13
+	add.d	$r7,$r13
 	ax
-	add.d	r8,r13
+	add.d	$r8,$r13
 	ax
-	add.d	r9,r13
+	add.d	$r9,$r13
 
 	;; fold the carry into the checksum, to avoid having to loop the carry
 	;; back into the top
 	
 	ax
-	addq	0,r13
+	addq	0,$r13
+	ax			; do it again, since we might have generated a carry
+	addq	0,$r13
 
-	subq	10*4,r12
-	bge	mloop
+	subq	10*4,$r12
+	bge	_mloop
 	nop
 
-	addq	10*4,r12	; compensate for last loop underflowing length
+	addq	10*4,$r12	; compensate for last loop underflowing length
+
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	;; only fold if there is anything to fold.
+
+	cmpq	0,$r13
+	beq	_no_fold
 
 	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below
+	;; r9 can be used as temporary.
 	
-	moveq	-1,r1		; put 0xffff in r1, faster than move.d 0xffff,r1
-	lsrq	16,r1
-	
-	move.d	r13,r0
-	lsrq	16,r0		; r0 = checksum >> 16
-	and.d	r1,r13		; checksum = checksum & 0xffff
-	add.d	r0,r13		; checksum += r0
-	move.d	r13,r0		; do the same again, maybe we got a carry last add
-	lsrq	16,r0
-	and.d	r1,r13
-	add.d	r0,r13
-	
-	movem	[sp+],r8	; restore regs
-		
-no_movem:
-	cmpq	2,r12
-	blt	no_words
+	move.d	$r13,$r9
+	lsrq	16,$r9		; r0 = checksum >> 16
+	and.d	0xffff,$r13	; checksum = checksum & 0xffff
+	add.d	$r9,$r13	; checksum += r0
+	move.d	$r13,$r9	; do the same again, maybe we got a carry last add
+	lsrq	16,$r9
+	and.d	0xffff,$r13
+	add.d	$r9,$r13
+	
+_no_fold:
+	cmpq	2,$r12
+	blt	_no_words
 	nop
 	
 	;; copy and checksum the rest of the words
 	
-	subq	2,r12
+	subq	2,$r12
 	
-wloop:	move.w	[r10+],r9
-	addu.w	r9,r13
-	subq	2,r12
-	bge	wloop
-	move.w	r9,[r11+]
+_wloop:	move.w	[$r10+],$r9
+2:	;; A failing userspace access will have this as PC.
+	addu.w	$r9,$r13
+	subq	2,$r12
+	bge	_wloop
+	move.w	$r9,[$r11+]
 	
-	addq	2,r12
+	addq	2,$r12
 		
-no_words:
+_no_words:
 	;; see if we have one odd byte more
-	cmpq	1,r12
-	beq	do_byte
+	cmpq	1,$r12
+	beq	_do_byte
 	nop
 	ret
-	move.d	r13, r10
+	move.d	$r13, $r10
 
-do_byte:	
+_do_byte:	
 	;; copy and checksum the last byte
-	move.b	[r10],r9
-	addu.b	r9,r13
-	move.b	r9,[r11]
+	move.b	[$r10],$r9
+3:	;; A failing userspace access will have this as PC.
+	addu.b	$r9,$r13
+	move.b	$r9,[$r11]
 	ret
-	move.d	r13, r10
-		
-	
\ No newline at end of file
+	move.d	$r13, $r10

Index: dmacopy.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/dmacopy.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2

Index: memset.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/memset.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- memset.c	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ memset.c	9 Apr 2002 17:03:16 -0000	1.2
@@ -27,6 +27,8 @@
 /*#                                                                         */
 /*#-------------------------------------------------------------------------*/
 
+#include <linux/types.h>
+
 /* No, there's no macro saying 12*4, since it is "hard" to get it into
    the asm in a good way.  Thus better to expose the problem everywhere.
    */
@@ -39,7 +41,7 @@
 
 void *memset(void *pdst,
              int c,
-             unsigned int plen)
+             size_t plen)
 {
   /* Ok.  Now we want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this. */
@@ -54,7 +56,12 @@
 
   /* Ugh.  This is fragile at best.  Check with newer GCC releases, if
      they compile cascaded "x |= x << 8" sanely! */
-  __asm__("movu.b %0,r13\n\tlslq 8,r13\n\tmove.b %0,r13\n\tmove.d r13,%0\n\tlslq 16,r13\n\tor.d r13,%0"
+  __asm__("movu.b %0,$r13\n\t"
+          "lslq 8,$r13\n\t"
+	  "move.b %0,$r13\n\t"
+	  "move.d $r13,%0\n\t"
+	  "lslq 16,$r13\n\t"
+	  "or.d $r13,%0"
           : "=r" (lc) : "0" (lc) : "r13");
 
   {
@@ -111,36 +118,36 @@
 	;; Save the registers we'll clobber in the movem process
 	;; on the stack.  Don't mention them to gcc, it will only be
 	;; upset.
-	subq 	11*4,sp
-        movem   r10,[sp]
+	subq 	11*4,$sp
+        movem   $r10,[$sp]
 
-        move.d  r11,r0
-        move.d  r11,r1
-        move.d  r11,r2
-        move.d  r11,r3
-        move.d  r11,r4
-        move.d  r11,r5
-        move.d  r11,r6
-        move.d  r11,r7
-        move.d  r11,r8
-        move.d  r11,r9
-        move.d  r11,r10
+        move.d  $r11,$r0
+        move.d  $r11,$r1
+        move.d  $r11,$r2
+        move.d  $r11,$r3
+        move.d  $r11,$r4
+        move.d  $r11,$r5
+        move.d  $r11,$r6
+        move.d  $r11,$r7
+        move.d  $r11,$r8
+        move.d  $r11,$r9
+        move.d  $r11,$r10
 
         ;; Now we've got this:
 	;; r13 - dst
 	;; r12 - n
 	
         ;; Update n for the first loop
-        subq    12*4,r12
+        subq    12*4,$r12
 0:
-        subq   12*4,r12
+        subq   12*4,$r12
         bge     0b
-	movem	r11,[r13+]
+	movem	$r11,[$r13+]
 
-        addq   12*4,r12  ;; compensate for last loop underflowing n
+        addq   12*4,$r12  ;; compensate for last loop underflowing n
 
 	;; Restore registers from stack
-        movem [sp+],r10" 
+        movem [$sp+],$r10" 
 
      /* Outputs */ : "=r" (dst), "=r" (n)
      /* Inputs */ : "0" (dst), "1" (n), "r" (lc));

Index: old_checksum.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/old_checksum.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2

Index: string.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/string.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- string.c	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ string.c	9 Apr 2002 17:03:16 -0000	1.2
@@ -31,9 +31,11 @@
 /*#                                                                         */
 /*#-------------------------------------------------------------------------*/
 
+#include <linux/types.h>
+
 void *memcpy(void *pdst,
              const void *psrc,
-             unsigned int pn)
+             size_t pn)
 {
   /* Ok.  Now we want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -100,8 +102,8 @@
         ;;
 	;; Save the registers we'll use in the movem process
 	;; on the stack.
-	subq 	11*4,sp
-	movem	r10,[sp]
+	subq 	11*4,$sp
+	movem	$r10,[$sp]
 
         ;; Now we've got this:
 	;; r11 - src
@@ -109,17 +111,17 @@
 	;; r12 - n
 	
         ;; Update n for the first loop
-        subq    44,r12
+        subq    44,$r12
 0:
-	movem	[r11+],r10
-        subq   44,r12
+	movem	[$r11+],$r10
+        subq   44,$r12
         bge     0b
-	movem	r10,[r13+]
+	movem	$r10,[$r13+]
 
-        addq   44,r12  ;; compensate for last loop underflowing n
+        addq   44,$r12  ;; compensate for last loop underflowing n
 
 	;; Restore registers from stack
-        movem [sp+],r10" 
+        movem [$sp+],$r10" 
 
      /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) 
      /* Inputs */ : "0" (dst), "1" (src), "2" (n));

Index: usercopy.c
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/arch/cris/lib/usercopy.c,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- usercopy.c	25 Feb 2001 23:15:23 -0000	1.1.1.1
+++ usercopy.c	9 Apr 2002 17:03:16 -0000	1.2
@@ -95,8 +95,8 @@
 	;;
 	;; Save the registers we'll use in the movem process
 	;; on the stack.
-	subq	11*4,sp
-	movem	r10,[sp]
+	subq	11*4,$sp
+	movem	$r10,[$sp]
 
 	;; Now we've got this:
 	;; r11 - src
@@ -104,7 +104,7 @@
 	;; r12 - n
 
 	;; Update n for the first loop
-	subq	44,r12
+	subq	44,$r12
 
 ; Since the noted PC of a faulting instruction in a delay-slot of a taken
 ; branch, is that of the branch target, we actually point at the from-movem
@@ -113,15 +113,15 @@
 ; after *that* movem.
 
 0:
-	movem	[r11+],r10
-	subq   44,r12
+	movem	[$r11+],$r10
+	subq   44,$r12
 	bge	0b
-	movem	r10,[r13+]
+	movem	$r10,[$r13+]
 1:
-	addq   44,r12  ;; compensate for last loop underflowing n
+	addq   44,$r12  ;; compensate for last loop underflowing n
 
 	;; Restore registers from stack
-	movem [sp+],r10
+	movem [$sp+],$r10
 2:
 	.section .fixup,\"ax\"
 
@@ -130,14 +130,14 @@
 ; performance penalty for sany use; the program will segfault soon enough.
 
 3:
-	move.d [sp],r10
-	addq 44,r10
-	move.d r10,[sp]
+	move.d [$sp],$r10
+	addq 44,$r10
+	move.d $r10,[$sp]
 	jump 0b
 4:
-	movem [sp+],r10
-	addq 44,r10
-	addq 44,r12
+	movem [$sp+],$r10
+	addq 44,$r10
+	addq 44,$r12
 	jump 2b
 
 	.previous
@@ -255,8 +255,8 @@
 	;;
 	;; Save the registers we'll use in the movem process
 	;; on the stack.
-	subq	11*4,sp
-	movem	r10,[sp]
+	subq	11*4,$sp
+	movem	$r10,[$sp]
 
 	;; Now we've got this:
 	;; r11 - src
@@ -264,42 +264,82 @@
 	;; r12 - n
 
 	;; Update n for the first loop
-	subq	44,r12
+	subq	44,$r12
 0:
-	movem	[r11+],r10
+	movem	[$r11+],$r10
 1:
-	subq   44,r12
+	subq   44,$r12
 	bge	0b
-	movem	r10,[r13+]
-
-	addq   44,r12  ;; compensate for last loop underflowing n
+	movem	$r10,[$r13+]
 
+	addq   44,$r12  ;; compensate for last loop underflowing n
+8:
 	;; Restore registers from stack
-	movem [sp+],r10
+	movem [$sp+],$r10
 
 	.section .fixup,\"ax\"
 
-; To provide a correct count in r10 of bytes that failed to be copied,
-; we jump back into the loop if the loop-branch was taken.
-;  There is no performance penalty; the program will segfault soon
-; enough.
+;; Do not jump back into the loop if we fail.  For some uses, we get a
+;; page fault but for performance reasons we care to not get further
+;; faults.  For example, fs/super.c at one time did
+;;  i = size - copy_from_user((void *)page, data, size);
+;; which would cause repeated faults while clearing the remainder of
+;; the SIZE bytes at PAGE after the first fault.
 
 3:
-	move.d [sp],r10
-	addq 44,r10
-	move.d r10,[sp]
-	clear.d r0
-	clear.d r1
-	clear.d r2
-	clear.d r3
-	clear.d r4
-	clear.d r5
-	clear.d r6
-	clear.d r7
-	clear.d r8
-	clear.d r9
-	clear.d r10
-	jump 1b
+	move.d [$sp],$r10
+
+;; Number of remaining bytes, cleared but not copied, is r12 + 44.
+
+	add.d $r12,$r10
+	addq 44,$r10
+
+	move.d $r10,[$sp]
+	clear.d $r0
+	clear.d $r1
+	clear.d $r2
+	clear.d $r3
+	clear.d $r4
+	clear.d $r5
+	clear.d $r6
+	clear.d $r7
+	clear.d $r8
+	clear.d $r9
+	clear.d $r10
+
+;; Perform clear similar to the copy-loop.
+
+4:
+	subq 44,$r12
+	bge 4b
+	movem $r10,[$r13+]
+
+;; Clear by four for the remaining multiples.
+
+	addq 40,$r12
+	bmi 6f
+	nop
+5:
+	subq 4,$r12
+	bpl 5b
+	clear.d [$r13+]
+6:
+	addq 4,$r12
+	beq 7f
+	nop
+
+	subq 1,$r12
+	beq 7f
+	clear.b [$r13+]
+
+	subq 1,$r12
+	beq 7f
+	clear.b [$r13+]
+
+	clear.d $r12
+	clear.b [$r13+]
+7:
+	jump 8b
 
 	.previous
 	.section __ex_table,\"a\"
@@ -411,50 +451,50 @@
 	;; Save the registers we'll clobber in the movem process
 	;; on the stack.  Don't mention them to gcc, it will only be
 	;; upset.
-	subq	11*4,sp
-	movem	r10,[sp]
+	subq	11*4,$sp
+	movem	$r10,[$sp]
 
-	clear.d r0
-	clear.d r1
-	clear.d r2
-	clear.d r3
-	clear.d r4
-	clear.d r5
-	clear.d r6
-	clear.d r7
-	clear.d r8
-	clear.d r9
-	clear.d r10
-	clear.d r11
+	clear.d $r0
+	clear.d $r1
+	clear.d $r2
+	clear.d $r3
+	clear.d $r4
+	clear.d $r5
+	clear.d $r6
+	clear.d $r7
+	clear.d $r8
+	clear.d $r9
+	clear.d $r10
+	clear.d $r11
 
 	;; Now we've got this:
 	;; r13 - dst
 	;; r12 - n
 
 	;; Update n for the first loop
-	subq	12*4,r12
+	subq	12*4,$r12
 0:
-	subq   12*4,r12
+	subq   12*4,$r12
 	bge	0b
-	movem	r11,[r13+]
+	movem	$r11,[$r13+]
 1:
-	addq   12*4,r12	 ;; compensate for last loop underflowing n
+	addq   12*4,$r12        ;; compensate for last loop underflowing n
 
 	;; Restore registers from stack
-	movem [sp+],r10
+	movem [$sp+],$r10
 2:
 	.section .fixup,\"ax\"
 3:
-	move.d [sp],r10
-	addq 12*4,r10
-	move.d r10,[sp]
-	clear.d r10
+	move.d [$sp],$r10
+	addq 12*4,$r10
+	move.d $r10,[$sp]
+	clear.d $r10
 	jump 0b
 
 4:
-	movem [sp+],r10
-	addq 12*4,r10
-	addq 12*4,r12
+	movem [$sp+],$r10
+	addq 12*4,$r10
+	addq 12*4,$r12
 	jump 2b
 
 	.previous





[LV-kern-commit] CVS: kernel-2.4/arch/cris/lib csumcpfruser.S,NONE,1.1 dram_init.S,NONE,1.1 hw_setti

[LV-kern-commit] CVS: kernel-2.4/arch/cris/lib csumcpfruser.S,NONE,1.1 dram_init.S,NONE,1.1 hw_settings.S,NONE,1.1 Makefile,1.1.1.1,1.2 checksum.S,1.1.1.1,1.2 checksumcopy.S,1.1.1.1,1.2 dmacopy.c,1.1.1.1,1.2 memset.c,1.1.1.1,1.2 old_checksum.c,1.1.1.1,1.2 string.c,1.1.1.1,1.2 usercopy.c,1.1.1.1,1.2